From 8053f21675b073b379cbca258ee4a3f3850dfa94 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 26 Jul 2016 14:55:51 -0700 Subject: ASoC: dapm: Add a dummy snd_pcm_runtime to avoid NULL pointer access The SND_SOC_DAPM_PRE_PMU case would call startup()/hw_params() that might access substream->runtime through other functions. For example: Unable to handle kernel NULL pointer dereference at virtual address [....] PC is at snd_pcm_hw_rule_add+0x24/0x1b0 LR is at snd_pcm_hw_constraint_list+0x20/0x28 [....] Process arecord (pid: 424, stack limit = 0xffffffc1ecaf0020) Call trace: [] snd_pcm_hw_rule_add+0x24/0x1b0 [] snd_pcm_hw_constraint_list+0x20/0x28 [] cs53l30_pcm_startup+0x24/0x30 [] snd_soc_dai_link_event+0x290/0x354 [] dapm_seq_check_event.isra.31+0x134/0x2c8 [] dapm_seq_run_coalesced+0x94/0x1c8 [] dapm_seq_run+0xa4/0x404 [] dapm_power_widgets+0x524/0x984 [] snd_soc_dapm_stream_event+0x8c/0xa8 [] soc_pcm_prepare+0x10c/0x1ec [] snd_pcm_do_prepare+0x1c/0x38 [] snd_pcm_action_single+0x40/0x88 [] snd_pcm_action_nonatomic+0x70/0x90 [] snd_pcm_common_ioctl1+0xb6c/0xdd8 [] snd_pcm_capture_ioctl1+0x200/0x334 [] snd_pcm_ioctl_compat+0x648/0x95c [] compat_SyS_ioctl+0xac/0xfc4 [] el0_svc_naked+0x24/0x28 ---[ end trace 0dc4f99c2759c35c ]--- So this patch adds a dummy runtime for the original dummy substream to merely avoid the NULL pointer access. Signed-off-by: Nicolin Chen Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 8698c26..d908ff8 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3493,6 +3493,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, const struct snd_soc_pcm_stream *config = w->params + w->params_select; struct snd_pcm_substream substream; struct snd_pcm_hw_params *params = NULL; + struct snd_pcm_runtime *runtime = NULL; u64 fmt; int ret; @@ -3541,6 +3542,14 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, memset(&substream, 0, sizeof(substream)); + /* Allocate a dummy snd_pcm_runtime for startup() and other ops() */ + runtime = kzalloc(sizeof(*runtime), GFP_KERNEL); + if (!runtime) { + ret = -ENOMEM; + goto out; + } + substream.runtime = runtime; + switch (event) { case SND_SOC_DAPM_PRE_PMU: substream.stream = SNDRV_PCM_STREAM_CAPTURE; @@ -3606,6 +3615,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, } out: + kfree(runtime); kfree(params); return ret; } -- cgit v0.10.2 From 93ca33c99f22a0a096f83f19c9f887aeb67507a1 Mon Sep 17 00:00:00 2001 From: Hiroyuki Yokoyama Date: Mon, 25 Jul 2016 01:52:43 +0000 Subject: ASoC: rsnd: Fixup SRCm_IFSVR calculate method This patch fixes the calculation accuracy degradation of SRCm_IFSVR register value. Signed-off-by: Hiroyuki Yokoyama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index e39f916..969a516 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -226,8 +226,12 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, ifscr = 0; fsrate = 0; if (fin != fout) { + u64 n; + ifscr = 1; - fsrate = 0x0400000 / fout * fin; + n = (u64)0x0400000 * fin; + do_div(n, fout); + fsrate = n; } /* -- cgit v0.10.2 From 9b622e2bbcf049c82e2550d35fb54ac205965f50 Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Thu, 28 Jul 2016 10:28:25 +0200 Subject: raid10: increment write counter after bio is split md pending write counter must be incremented after bio is split, otherwise it gets decremented too many times in end bio callback and becomes negative. Signed-off-by: Tomasz Majchrzak Reviewed-by: Artur Paszkiewicz Signed-off-by: Shaohua Li diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ed29fc8..1a632a8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int max_sectors; int sectors; + md_write_start(mddev, bio); + /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. @@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) return; } - md_write_start(mddev, bio); - do { /* -- cgit v0.10.2 From 3980bd3b406addb327d858aebd19e229ea340b9a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 31 Jul 2016 23:38:36 -0400 Subject: ext4: reserve xattr index for the Hurd The Hurd is using inode fields which restricts it from using more advanced ext4 file system features, due to design choices made over a decade ago. By giving the Hurd an extended attribute index field we allow it to move the translator and author fields out of the core inode fields, and hopefully we can get rid of ugly hacks such as EXT4_OS_HURD and EXT4_MOUNT2_HURD_COMPAT somday. For more information please see: https://summerofcode.withgoogle.com/projects/#5869799859027968 Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 69dd3e6..a92e783 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -24,6 +24,7 @@ #define EXT4_XATTR_INDEX_SYSTEM 7 #define EXT4_XATTR_INDEX_RICHACL 8 #define EXT4_XATTR_INDEX_ENCRYPTION 9 +#define EXT4_XATTR_INDEX_HURD 10 /* Reserved for Hurd */ struct ext4_xattr_header { __le32 h_magic; /* magic number for identification */ -- cgit v0.10.2 From 829fa70dddadf9dd041d62b82cd7cea63943899d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2016 00:51:02 -0400 Subject: ext4: validate that metadata blocks do not overlap superblock A number of fuzzing failures seem to be caused by allocation bitmaps or other metadata blocks being pointed at the superblock. This can cause kernel BUG or WARNings once the superblock is overwritten, so validate the group descriptor blocks to make sure this doesn't happen. Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c13a4e4..da3146e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2211,6 +2211,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, /* Called at mount-time, super-block is locked */ static int ext4_check_descriptors(struct super_block *sb, + ext4_fsblk_t sb_block, ext4_group_t *first_not_zeroed) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -2241,6 +2242,11 @@ static int ext4_check_descriptors(struct super_block *sb, grp = i; block_bitmap = ext4_block_bitmap(sb, gdp); + if (block_bitmap == sb_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Block bitmap for group %u overlaps " + "superblock", i); + } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " @@ -2248,6 +2254,11 @@ static int ext4_check_descriptors(struct super_block *sb, return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); + if (inode_bitmap == sb_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode bitmap for group %u overlaps " + "superblock", i); + } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " @@ -2255,6 +2266,11 @@ static int ext4_check_descriptors(struct super_block *sb, return 0; } inode_table = ext4_inode_table(sb, gdp); + if (inode_table == sb_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode table for group %u overlaps " + "superblock", i); + } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -3757,7 +3773,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount2; } } - if (!ext4_check_descriptors(sb, &first_not_zeroed)) { + if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ret = -EFSCORRUPTED; goto failed_mount2; -- cgit v0.10.2 From 2392f7fd695246df4fb5f0b5fb88ce37cdb01764 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 26 Jul 2016 18:06:39 +0530 Subject: ASoC: Intel: Skylake: Check list empty while getting module info Module list can be NULL so check if the list is empty before accessing the list. Signed-off-by: Senthilnathan Veppur Signed-off-by: Vinod Koul Signed-off-by: Mark Brown diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index 25fcb79..ddcb52a 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -123,6 +123,11 @@ int snd_skl_get_module_info(struct skl_sst *ctx, u8 *uuid, uuid_mod = (uuid_le *)uuid; + if (list_empty(&ctx->uuid_list)) { + dev_err(ctx->dev, "Module list is empty\n"); + return -EINVAL; + } + list_for_each_entry(module, &ctx->uuid_list, list) { if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) { dfw_config->module_id = module->id; -- cgit v0.10.2 From ff00d3b4e5e4395c825e8ec628f25932d812f31a Mon Sep 17 00:00:00 2001 From: ZhengYuan Liu Date: Thu, 28 Jul 2016 14:22:14 +0800 Subject: raid5: fix incorrectly counter of conf->empty_inactive_list_nr The counter conf->empty_inactive_list_nr is only used for determine if the raid5 is congested which is deal with in function raid5_congested(). It was increased in get_free_stripe() when conf->inactive_list got to be empty and decreased in release_inactive_stripe_list() when splice temp_inactive_list to conf->inactive_list. However, this may have a problem when raid5_get_active_stripe or stripe_add_to_batch_list was called, because these two functions may call list_del_init(&sh->lru) to delete sh from "conf->inactive_list + hash" which may cause "conf->inactive_list + hash" to be empty when atomic_inc_not_zero(&sh->count) got false. So a check should be done at these two point and increase empty_inactive_list_nr accordingly. Otherwise the counter may get to be negative number which would influence async readahead from VFS. Signed-off-by: ZhengYuan Liu Signed-off-by: Shaohua Li diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d189e89..e379b89 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, { struct stripe_head *sh; int hash = stripe_hash_locks_hash(sector); + int inc_empty_inactive_list_flag; pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); @@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&sh->lru) && !test_bit(STRIPE_EXPANDING, &sh->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&sh->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (sh->group) { sh->group->stripes_cnt--; sh->group = NULL; @@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh sector_t head_sector, tmp_sec; int hash; int dd_idx; + int inc_empty_inactive_list_flag; /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */ tmp_sec = sh->sector; @@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&head->lru) && !test_bit(STRIPE_EXPANDING, &head->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&head->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (head->group) { head->group->stripes_cnt--; head->group = NULL; -- cgit v0.10.2 From 680682d4d537565e2c358483e1feeca30a8cf3d4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Jul 2016 19:55:27 +0100 Subject: cfg80211: fix missing break in NL8211_CHAN_WIDTH_80P80 case The switch on chandef->width is missing a break on the NL8211_CHAN_WIDTH_80P80 case; currently we get a WARN_ON when center_freq2 is non-zero because of the missing break. Signed-off-by: Colin Ian King Signed-off-by: Johannes Berg diff --git a/net/wireless/chan.c b/net/wireless/chan.c index da49c0b..bb3d64e 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -513,6 +513,7 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, r = cfg80211_get_chans_dfs_available(wiphy, chandef->center_freq2, width); + break; default: WARN_ON(chandef->center_freq2); break; -- cgit v0.10.2 From 4e3f21bc7bfbdc4a348852e4da1540227e1b0171 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 11 Jul 2016 15:09:15 +0200 Subject: mac80211: fix check for buffered powersave frames with txq The logic was inverted here, set the bit if frames are pending. Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2e8a902..9dce3b1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1268,7 +1268,7 @@ static void sta_ps_start(struct sta_info *sta) for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!txqi->tin.backlog_packets) + if (txqi->tin.backlog_packets) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); -- cgit v0.10.2 From c37a54ac37e7fbf8abe2b7b886ac5f1b3e8f8d29 Mon Sep 17 00:00:00 2001 From: Maital Hahn Date: Wed, 13 Jul 2016 14:44:41 +0300 Subject: mac80211: mesh: flush stations before beacons are stopped Some drivers (e.g. wl18xx) expect that the last stage in the de-initialization process will be stopping the beacons, similar to AP flow. Update ieee80211_stop_mesh() flow accordingly. As peers can be removed dynamically, this would not impact other drivers. Tested also on Ralink RT3572 chipset. Signed-off-by: Maital Hahn Signed-off-by: Yaniv Machani Signed-off-by: Johannes Berg diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c66411d..42120d9 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -881,20 +881,22 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) netif_carrier_off(sdata->dev); + /* flush STAs and mpaths on this iface */ + sta_info_flush(sdata); + mesh_path_flush_by_iface(sdata); + /* stop the beacon */ ifmsh->mesh_id_len = 0; sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + + /* remove beacon */ bcn = rcu_dereference_protected(ifmsh->beacon, lockdep_is_held(&sdata->wdev.mtx)); RCU_INIT_POINTER(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - /* flush STAs and mpaths on this iface */ - sta_info_flush(sdata); - mesh_path_flush_by_iface(sdata); - /* free all potentially still buffered group-addressed frames */ local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf); skb_queue_purge(&ifmsh->ps.bc_buf); -- cgit v0.10.2 From 1f85e118c81d15aa9e002604dfb69c823d4aac16 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 3 Aug 2016 01:24:05 +0000 Subject: ASoC: simple-card-utils: add missing MODULE_xxx() simple-card-utils might be used as module, but MODULE_xxx() information was missed. This patch adds it. Otherwise, we will have below error, and can't use it. Specil thanks to Kevin. > insmod simple-card-utils.ko simple_card_utils: module license 'unspecified' taints kernel. Disabling lock debugging due to kernel taint simple_card_utils: Unknown symbol snd_soc_of_parse_daifmt (err 0) simple_card_utils: Unknown symbol snd_soc_of_parse_card_name (err 0) insmod: can't insert 'simple-card-utils.ko': \ unknown symbol in module, or unknown parameter Reported-by: Kevin Hilman Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown diff --git a/sound/soc/generic/Makefile b/sound/soc/generic/Makefile index 45602ca..2d53c8d 100644 --- a/sound/soc/generic/Makefile +++ b/sound/soc/generic/Makefile @@ -1,5 +1,5 @@ -obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) := simple-card-utils.o - +snd-soc-simple-card-utils-objs := simple-card-utils.o snd-soc-simple-card-objs := simple-card.o -obj-$(CONFIG_SND_SIMPLE_CARD) += snd-soc-simple-card.o +obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) += snd-soc-simple-card-utils.o +obj-$(CONFIG_SND_SIMPLE_CARD) += snd-soc-simple-card.o diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index d89a9a1..9599de6 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include @@ -95,3 +96,8 @@ int asoc_simple_card_parse_card_name(struct snd_soc_card *card, return 0; } EXPORT_SYMBOL_GPL(asoc_simple_card_parse_card_name); + +/* Module information */ +MODULE_AUTHOR("Kuninori Morimoto "); +MODULE_DESCRIPTION("ALSA SoC Simple Card Utils"); +MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From bce91f8a4247905b8c40a53f72c14db908cd0710 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 1 Aug 2016 19:36:07 -0700 Subject: openvswitch: Remove incorrect WARN_ONCE(). ovs_ct_find_existing() issues a warning if an existing conntrack entry classified as IP_CT_NEW is found, with the premise that this should not happen. However, a newly confirmed, non-expected conntrack entry remains IP_CT_NEW as long as no reply direction traffic is seen. This has resulted into somewhat confusing kernel log messages. This patch removes this check and warning. Fixes: 289f2253 ("openvswitch: Find existing conntrack entry after upcall.") Suggested-by: Joe Stringer Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Signed-off-by: David S. Miller diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c644c78..e054a74 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -433,7 +433,6 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; - enum ip_conntrack_info ctinfo; struct nf_conn *ct; unsigned int dataoff; u8 protonum; @@ -458,13 +457,8 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ct = nf_ct_tuplehash_to_ctrack(h); - ctinfo = ovs_ct_get_info(h); - if (ctinfo == IP_CT_NEW) { - /* This should not happen. */ - WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct); - } skb->nfct = &ct->ct_general; - skb->nfctinfo = ctinfo; + skb->nfctinfo = ovs_ct_get_info(h); return ct; } -- cgit v0.10.2 From ea966cb6b9e11e87f7b146549aef8e13cad5c6ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 2 Aug 2016 12:23:29 +0200 Subject: net: xgene: fix maybe-uninitialized variable Building with -Wmaybe-uninitialized shows a potential use of an uninitialized variable: drivers/net/ethernet/apm/xgene/xgene_enet_hw.c: In function 'xgene_enet_phy_connect': drivers/net/ethernet/apm/xgene/xgene_enet_hw.c:802:23: warning: 'phy_dev' may be used uninitialized in this function [-Wmaybe-uninitialized] Although the compiler correctly identified this based on the function, the current code is still safe as long dev->of_node is non-NULL for the case of CONFIG_ACPI=n, which is currently the case. The warning is now disabled by default, but still appears when building with W=1, and other build test tools should be able to detect it as well. Adding an #else clause here makes the code more robust and makes it clear to the compiler that this cannot happen. Signed-off-by: Arnd Bergmann Fixes: 8089a96f601b ("drivers: net: xgene: Add backward compatibility") Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 37a0f46..18bb955 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -793,6 +793,8 @@ int xgene_enet_phy_connect(struct net_device *ndev) netdev_err(ndev, "Could not connect to PHY\n"); return -ENODEV; } +#else + return -ENODEV; #endif } -- cgit v0.10.2 From 1f415a74b0ca64b5bfacbb12d71ed2ec050a8cfb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Aug 2016 16:12:14 +0100 Subject: bpf: fix method of PTR_TO_PACKET reg id generation Using per-register incrementing ID can lead to find_good_pkt_pointers() confusing registers which have completely different values. Consider example: 0: (bf) r6 = r1 1: (61) r8 = *(u32 *)(r6 +76) 2: (61) r0 = *(u32 *)(r6 +80) 3: (bf) r7 = r8 4: (07) r8 += 32 5: (2d) if r8 > r0 goto pc+9 R0=pkt_end R1=ctx R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=0,off=32,r=32) R10=fp 6: (bf) r8 = r7 7: (bf) r9 = r7 8: (71) r1 = *(u8 *)(r7 +0) 9: (0f) r8 += r1 10: (71) r1 = *(u8 *)(r7 +1) 11: (0f) r9 += r1 12: (07) r8 += 32 13: (2d) if r8 > r0 goto pc+1 R0=pkt_end R1=inv56 R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=1,off=32,r=32) R9=pkt(id=1,off=0,r=32) R10=fp 14: (71) r1 = *(u8 *)(r9 +16) 15: (b7) r7 = 0 16: (bf) r0 = r7 17: (95) exit We need to get a UNKNOWN_VALUE with imm to force id generation so lines 0-5 make r7 a valid packet pointer. We then read two different bytes from the packet and add them to copies of the constructed packet pointer. r8 (line 9) and r9 (line 11) will get the same id of 1, independently. When either of them is validated (line 13) - find_good_pkt_pointers() will also mark the other as safe. This leads to access on line 14 being mistakenly considered safe. Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f72f23b..7094c69 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -194,6 +194,7 @@ struct verifier_env { struct verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; }; @@ -1301,7 +1302,7 @@ add_imm: /* dst_reg stays as pkt_ptr type and since some positive * integer value was added to the pointer, increment its 'id' */ - dst_reg->id++; + dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range and off to zero */ dst_reg->off = 0; -- cgit v0.10.2 From 087d7a8c917491e6e7feb707a858d624bf5b5f14 Mon Sep 17 00:00:00 2001 From: Satish Baddipadige Date: Wed, 3 Aug 2016 09:43:59 +0530 Subject: tg3: Fix for diasllow rx coalescing time to be 0 When the rx coalescing time is 0, interrupts are not generated from the controller and rx path hangs. To avoid this rx hang, updating the driver to not allow rx coalescing time to be 0. Signed-off-by: Satish Baddipadige Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ff300f7..f3c6c91 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14014,6 +14014,7 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) } if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) || + (!ec->rx_coalesce_usecs) || (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) || (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) || (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) || -- cgit v0.10.2 From 9ce6fd7a81e6f787756be2f4b85f4f7bb3658de3 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Wed, 3 Aug 2016 09:44:00 +0530 Subject: tg3: Report the correct number of RSS queues through tg3_get_rxnfc This patch remove the wrong substraction from info->data in tg3_get_rxnfc function. Without this patch, the number of RSS queues reported is less by one. Reported-by: Michal Soltys Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index f3c6c91..6592612 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12552,10 +12552,6 @@ static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, info->data = TG3_RSS_MAX_NUM_QS; } - /* The first interrupt vector only - * handles link interrupts. - */ - info->data -= 1; return 0; default: -- cgit v0.10.2 From 2b10d3ecf2dac737653828889ff85f614318f01a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 3 Aug 2016 04:02:02 -0400 Subject: qlcnic: fix data structure corruption in async mbx command handling This patch fixes a data structure corruption bug in the SRIOV VF mailbox handler code. While handling mailbox commands from the atomic context, driver is accessing and updating qlcnic_async_work_list_struct entry fields in the async work list. These fields could be concurrently accessed by the work function resulting in data corruption. This patch restructures async mbx command handling by using a separate async command list instead of using a list of work_struct structures. A single work_struct is used to schedule and handle the async commands with proper locking mechanism. Signed-off-by: Rajesh Borundia Signed-off-by: Sony Chacko Signed-off-by: Manish Chopra Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 017d8c2c..24061b9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -156,10 +156,8 @@ struct qlcnic_vf_info { spinlock_t vlan_list_lock; /* Lock for VLAN list */ }; -struct qlcnic_async_work_list { +struct qlcnic_async_cmd { struct list_head list; - struct work_struct work; - void *ptr; struct qlcnic_cmd_args *cmd; }; @@ -168,7 +166,10 @@ struct qlcnic_back_channel { struct workqueue_struct *bc_trans_wq; struct workqueue_struct *bc_async_wq; struct workqueue_struct *bc_flr_wq; - struct list_head async_list; + struct qlcnic_adapter *adapter; + struct list_head async_cmd_list; + struct work_struct vf_async_work; + spinlock_t queue_lock; /* async_cmd_list queue lock */ }; struct qlcnic_sriov { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 7327b72..d710705 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -29,6 +29,7 @@ #define QLC_83XX_VF_RESET_FAIL_THRESH 8 #define QLC_BC_CMD_MAX_RETRY_CNT 5 +static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work); static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *); static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32); static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *); @@ -177,7 +178,10 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs) } bc->bc_async_wq = wq; - INIT_LIST_HEAD(&bc->async_list); + INIT_LIST_HEAD(&bc->async_cmd_list); + INIT_WORK(&bc->vf_async_work, qlcnic_sriov_handle_async_issue_cmd); + spin_lock_init(&bc->queue_lock); + bc->adapter = adapter; for (i = 0; i < num_vfs; i++) { vf = &sriov->vf_info[i]; @@ -1517,17 +1521,21 @@ static void qlcnic_vf_add_mc_list(struct net_device *netdev, const u8 *mac, void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc) { - struct list_head *head = &bc->async_list; - struct qlcnic_async_work_list *entry; + struct list_head *head = &bc->async_cmd_list; + struct qlcnic_async_cmd *entry; flush_workqueue(bc->bc_async_wq); + cancel_work_sync(&bc->vf_async_work); + + spin_lock(&bc->queue_lock); while (!list_empty(head)) { - entry = list_entry(head->next, struct qlcnic_async_work_list, + entry = list_entry(head->next, struct qlcnic_async_cmd, list); - cancel_work_sync(&entry->work); list_del(&entry->list); + kfree(entry->cmd); kfree(entry); } + spin_unlock(&bc->queue_lock); } void qlcnic_sriov_vf_set_multi(struct net_device *netdev) @@ -1587,57 +1595,64 @@ void qlcnic_sriov_vf_set_multi(struct net_device *netdev) static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work) { - struct qlcnic_async_work_list *entry; - struct qlcnic_adapter *adapter; + struct qlcnic_async_cmd *entry, *tmp; + struct qlcnic_back_channel *bc; struct qlcnic_cmd_args *cmd; + struct list_head *head; + LIST_HEAD(del_list); + + bc = container_of(work, struct qlcnic_back_channel, vf_async_work); + head = &bc->async_cmd_list; + + spin_lock(&bc->queue_lock); + list_splice_init(head, &del_list); + spin_unlock(&bc->queue_lock); + + list_for_each_entry_safe(entry, tmp, &del_list, list) { + list_del(&entry->list); + cmd = entry->cmd; + __qlcnic_sriov_issue_cmd(bc->adapter, cmd); + kfree(entry); + } + + if (!list_empty(head)) + queue_work(bc->bc_async_wq, &bc->vf_async_work); - entry = container_of(work, struct qlcnic_async_work_list, work); - adapter = entry->ptr; - cmd = entry->cmd; - __qlcnic_sriov_issue_cmd(adapter, cmd); return; } -static struct qlcnic_async_work_list * -qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc) +static struct qlcnic_async_cmd * +qlcnic_sriov_alloc_async_cmd(struct qlcnic_back_channel *bc, + struct qlcnic_cmd_args *cmd) { - struct list_head *node; - struct qlcnic_async_work_list *entry = NULL; - u8 empty = 0; + struct qlcnic_async_cmd *entry = NULL; - list_for_each(node, &bc->async_list) { - entry = list_entry(node, struct qlcnic_async_work_list, list); - if (!work_pending(&entry->work)) { - empty = 1; - break; - } - } + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return NULL; - if (!empty) { - entry = kzalloc(sizeof(struct qlcnic_async_work_list), - GFP_ATOMIC); - if (entry == NULL) - return NULL; - list_add_tail(&entry->list, &bc->async_list); - } + entry->cmd = cmd; + + spin_lock(&bc->queue_lock); + list_add_tail(&entry->list, &bc->async_cmd_list); + spin_unlock(&bc->queue_lock); return entry; } static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc, - work_func_t func, void *data, struct qlcnic_cmd_args *cmd) { - struct qlcnic_async_work_list *entry = NULL; + struct qlcnic_async_cmd *entry = NULL; - entry = qlcnic_sriov_get_free_node_async_work(bc); - if (!entry) + entry = qlcnic_sriov_alloc_async_cmd(bc, cmd); + if (!entry) { + qlcnic_free_mbx_args(cmd); + kfree(cmd); return; + } - entry->ptr = data; - entry->cmd = cmd; - INIT_WORK(&entry->work, func); - queue_work(bc->bc_async_wq, &entry->work); + queue_work(bc->bc_async_wq, &bc->vf_async_work); } static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter, @@ -1649,8 +1664,8 @@ static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter, if (adapter->need_fw_reset) return -EIO; - qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd, - adapter, cmd); + qlcnic_sriov_schedule_async_cmd(bc, cmd); + return 0; } -- cgit v0.10.2 From fc4ca987f7cc0cb7ea8cb8bb673447939a84bb07 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 3 Aug 2016 04:02:03 -0400 Subject: qlcnic: fix napi budget alteration Driver modifies the supplied NAPI budget in qlcnic_83xx_msix_tx_poll() function. Instead, it should use the budget as it is. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 87c642d..fedd736 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -102,7 +102,6 @@ #define QLCNIC_RESPONSE_DESC 0x05 #define QLCNIC_LRO_DESC 0x12 -#define QLCNIC_TX_POLL_BUDGET 128 #define QLCNIC_TCP_HDR_SIZE 20 #define QLCNIC_TCP_TS_OPTION_SIZE 12 #define QLCNIC_FETCH_RING_ID(handle) ((handle) >> 63) @@ -2008,7 +2007,6 @@ static int qlcnic_83xx_msix_tx_poll(struct napi_struct *napi, int budget) struct qlcnic_host_tx_ring *tx_ring; struct qlcnic_adapter *adapter; - budget = QLCNIC_TX_POLL_BUDGET; tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi); adapter = tx_ring->adapter; work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget); -- cgit v0.10.2 From b8b2372de9cc00d5ed667c7b8db29b6cfbf037f5 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 3 Aug 2016 04:02:04 -0400 Subject: qlcnic: Update version to 5.3.65 Signed-off-by: Manish Chopra Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index fd973f4..49bad00 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -37,8 +37,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 3 -#define _QLCNIC_LINUX_SUBVERSION 64 -#define QLCNIC_LINUX_VERSIONID "5.3.64" +#define _QLCNIC_LINUX_SUBVERSION 65 +#define QLCNIC_LINUX_VERSIONID "5.3.65" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) -- cgit v0.10.2 From 1bc610e7a17dcf5165f1ed4e0201ee080ba1a0df Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 4 Aug 2016 11:51:25 +0200 Subject: ASoC: samsung: Fix clock handling in S3C24XX_UDA134X card There is no "pclk" alias in the s3c2440 clk driver for "soc-audio" device so related clk_get() fails, which prevents any operation of the S3C24XX_UDA134X sound card. Instead we get the clock on behalf of the I2S device, i.e. we use the I2S block gate clock which has PCLK is its parent clock. Without this patch there is an error like: s3c24xx_uda134x_startup cannot get pclk ASoC: UDA134X startup failed: -2 Signed-off-by: Sylwester Nawrocki Signed-off-by: Mark Brown diff --git a/sound/soc/samsung/s3c24xx_uda134x.c b/sound/soc/samsung/s3c24xx_uda134x.c index 50849e1..92e88bc 100644 --- a/sound/soc/samsung/s3c24xx_uda134x.c +++ b/sound/soc/samsung/s3c24xx_uda134x.c @@ -58,10 +58,12 @@ static struct platform_device *s3c24xx_uda134x_snd_device; static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream) { - int ret = 0; + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = rtd->cpu_dai; #ifdef ENFORCE_RATES struct snd_pcm_runtime *runtime = substream->runtime; #endif + int ret = 0; mutex_lock(&clk_lock); pr_debug("%s %d\n", __func__, clk_users); @@ -71,8 +73,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream) printk(KERN_ERR "%s cannot get xtal\n", __func__); ret = PTR_ERR(xtal); } else { - pclk = clk_get(&s3c24xx_uda134x_snd_device->dev, - "pclk"); + pclk = clk_get(cpu_dai->dev, "iis"); if (IS_ERR(pclk)) { printk(KERN_ERR "%s cannot get pclk\n", __func__); -- cgit v0.10.2 From ca6ac305f017472a172e53345264abdb495eba46 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Thu, 4 Aug 2016 16:52:06 +0800 Subject: ASoC: nau8825: fix bug in playback when suspend In chromium, the following steps will make codec function fail. \1. plug in headphones, Play music \2. run "powerd_dbus_suspend" \3. resume from S3 After resume, the jack detection will restart and make configuration for the headset. Meanwhile, the playback prepares and starts to work. The two sequences will conflict and make wrong register configuration. Originally, the driver adds protection for the case when it finds the playback is active. But the "powerd_dbus_suspend" command will close the pcm stream before suspend. Therefore, the driver can't detect the playback after resume, and the protection not works. For the issue, the driver raises protection every time after resume. The protection will release after jack detection and configuration completes, and then the playback just will goes on. Signed-off-by: John Hsu Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 5c9707a..20d2f15 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -212,31 +212,6 @@ static const unsigned short logtable[256] = { 0xfa2f, 0xfaea, 0xfba5, 0xfc60, 0xfd1a, 0xfdd4, 0xfe8e, 0xff47 }; -static struct snd_soc_dai *nau8825_get_codec_dai(struct nau8825 *nau8825) -{ - struct snd_soc_codec *codec = snd_soc_dapm_to_codec(nau8825->dapm); - struct snd_soc_component *component = &codec->component; - struct snd_soc_dai *codec_dai, *_dai; - - list_for_each_entry_safe(codec_dai, _dai, &component->dai_list, list) { - if (!strncmp(codec_dai->name, NUVOTON_CODEC_DAI, - strlen(NUVOTON_CODEC_DAI))) - return codec_dai; - } - return NULL; -} - -static bool nau8825_dai_is_active(struct nau8825 *nau8825) -{ - struct snd_soc_dai *codec_dai = nau8825_get_codec_dai(nau8825); - - if (codec_dai) { - if (codec_dai->playback_active || codec_dai->capture_active) - return true; - } - return false; -} - /** * nau8825_sema_acquire - acquire the semaphore of nau88l25 * @nau8825: component to register the codec private data with @@ -1205,6 +1180,8 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec); unsigned int val_len = 0; + nau8825_sema_acquire(nau8825, 2 * HZ); + switch (params_width(params)) { case 16: val_len |= NAU8825_I2S_DL_16; @@ -1225,6 +1202,9 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL1, NAU8825_I2S_DL_MASK, val_len); + /* Release the semaphone. */ + nau8825_sema_release(nau8825); + return 0; } @@ -1234,6 +1214,8 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec); unsigned int ctrl1_val = 0, ctrl2_val = 0; + nau8825_sema_acquire(nau8825, 2 * HZ); + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFM: ctrl2_val |= NAU8825_I2S_MS_MASTER; @@ -1282,6 +1264,9 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2, NAU8825_I2S_MS_MASK, ctrl2_val); + /* Release the semaphone. */ + nau8825_sema_release(nau8825); + return 0; } @@ -2241,20 +2226,8 @@ static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec) regcache_cache_only(nau8825->regmap, false); regcache_sync(nau8825->regmap); - if (nau8825_is_jack_inserted(nau8825->regmap)) { - /* If the jack is inserted, we need to check whether the play- - * back is active before suspend. If active, the driver has to - * raise the protection for cross talk function to avoid the - * playback recovers before cross talk process finish. Other- - * wise, the playback will be interfered by cross talk func- - * tion. It is better to apply hardware related parameters - * before starting playback or record. - */ - if (nau8825_dai_is_active(nau8825)) { - nau8825->xtalk_protect = true; - nau8825_sema_acquire(nau8825, 0); - } - } + nau8825->xtalk_protect = true; + nau8825_sema_acquire(nau8825, 0); enable_irq(nau8825->irq); return 0; -- cgit v0.10.2 From 06746c672cdd60715ee57ab1aced95a9e536fd4d Mon Sep 17 00:00:00 2001 From: John Hsu Date: Thu, 4 Aug 2016 16:52:07 +0800 Subject: ASoC: nau8825: fix static check error about semaphone control The patch is to fix the static check error as the following. The patch commit b50455fab459 ("ASoC: nau8825: cross talk suppression measurement function") from Jun 7, 2016, leads to the following static checker warning: sound/soc/codecs/nau8825.c:265 nau8825_sema_acquire() warn: 'sem:&nau8825->xtalk_sem' is sometimes locked here and sometimes unlocked. The semaphone acquire function has return value, and some callers can do error handling when lock fails. Signed-off-by: John Hsu Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 20d2f15..2e59a85 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -225,19 +225,26 @@ static const unsigned short logtable[256] = { * Acquires the semaphore without jiffies. If no more tasks are allowed * to acquire the semaphore, calling this function will put the task to * sleep until the semaphore is released. - * It returns if the semaphore was acquired. + * If the semaphore is not released within the specified number of jiffies, + * this function returns -ETIME. + * If the sleep is interrupted by a signal, this function will return -EINTR. + * It returns 0 if the semaphore was acquired successfully. */ -static void nau8825_sema_acquire(struct nau8825 *nau8825, long timeout) +static int nau8825_sema_acquire(struct nau8825 *nau8825, long timeout) { int ret; - if (timeout) + if (timeout) { ret = down_timeout(&nau8825->xtalk_sem, timeout); - else + if (ret < 0) + dev_warn(nau8825->dev, "Acquire semaphone timeout\n"); + } else { ret = down_interruptible(&nau8825->xtalk_sem); + if (ret < 0) + dev_warn(nau8825->dev, "Acquire semaphone fail\n"); + } - if (ret < 0) - dev_warn(nau8825->dev, "Acquire semaphone fail\n"); + return ret; } /** @@ -1596,8 +1603,11 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) * cess and restore changes if process * is ongoing when ejection. */ + int ret; nau8825->xtalk_protect = true; - nau8825_sema_acquire(nau8825, 0); + ret = nau8825_sema_acquire(nau8825, 0); + if (ret < 0) + nau8825->xtalk_protect = false; } /* Startup cross talk detection process */ nau8825->xtalk_state = NAU8825_XTALK_PREPARE; @@ -2223,11 +2233,14 @@ static int __maybe_unused nau8825_suspend(struct snd_soc_codec *codec) static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec) { struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec); + int ret; regcache_cache_only(nau8825->regmap, false); regcache_sync(nau8825->regmap); nau8825->xtalk_protect = true; - nau8825_sema_acquire(nau8825, 0); + ret = nau8825_sema_acquire(nau8825, 0); + if (ret < 0) + nau8825->xtalk_protect = false; enable_irq(nau8825->irq); return 0; -- cgit v0.10.2 From 5d764912a0ee6db83e962c1501b5b9e58ba14e15 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Thu, 4 Aug 2016 15:35:37 +0100 Subject: ASoC: da7213: Default to 64 BCLKs per WCLK to support all formats Previously code defaulted to 32 BCLKS per WCLK which meant 24 and 32 bit DAI formats would not work properly. This patch fixes the issue by defaulting to 64 BCLKs per WCLK. Signed-off-by: Adam Thomson Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index e5527bc..bcf1834 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -1247,8 +1247,8 @@ static int da7213_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) return -EINVAL; } - /* By default only 32 BCLK per WCLK is supported */ - dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_32; + /* By default only 64 BCLK per WCLK is supported */ + dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_64; snd_soc_write(codec, DA7213_DAI_CLK_MODE, dai_clk_mode); snd_soc_update_bits(codec, DA7213_DAI_CTRL, DA7213_DAI_FORMAT_MASK, -- cgit v0.10.2 From ea0a95d7f162bfa1c9df74471f0064f71cdf80ea Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 25 Jul 2016 07:00:33 +0000 Subject: fcoe: Use kfree_skb() instead of kfree() Use kfree_skb() instead of kfree() to free sk_buff. Signed-off-by: Wei Yongjun Acked-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index a569c65..dcf3653 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -2923,7 +2923,7 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) mutex_unlock(&fip->ctlr_mutex); drop: - kfree(skb); + kfree_skb(skb); return rc; } -- cgit v0.10.2 From 6b07d9ca9b5363dda959b9582a3fc9c0b89ef3b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 2 Aug 2016 11:13:41 +0200 Subject: mac80211: fix purging multicast PS buffer queue The code currently assumes that buffered multicast PS frames don't have a pending ACK frame for tx status reporting. However, hostapd sends a broadcast deauth frame on teardown for which tx status is requested. This can lead to the "Have pending ack frames" warning on module reload. Fix this by using ieee80211_free_txskb/ieee80211_purge_tx_queue. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 47e99ab8..543b1d4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -869,7 +869,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); - skb_queue_purge(&sdata->u.ap.ps.bc_buf); + ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); mutex_lock(&local->mtx); ieee80211_vif_copy_chanctx_to_vlans(sdata, true); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 91461c4..5023966 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -368,7 +368,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) skb = skb_dequeue(&ps->bc_buf); if (skb) { purged++; - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); } total += skb_queue_len(&ps->bc_buf); } @@ -451,7 +451,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) { ps_dbg(tx->sdata, "BC TX buffer full - dropping the oldest frame\n"); - dev_kfree_skb(skb_dequeue(&ps->bc_buf)); + ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf)); } else tx->local->total_ps_buffered++; @@ -4275,7 +4275,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb)) break; - dev_kfree_skb_any(skb); + ieee80211_free_txskb(hw, skb); } info = IEEE80211_SKB_CB(skb); -- cgit v0.10.2 From 71f2c3470fca51be27f6fc5975675f7e1c3b91e5 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 2 Aug 2016 17:16:57 +0900 Subject: mac80211: End the MPSP even if EOSP frame was not acked If QoS frame with EOSP (end of service period) subfield=1 sent by local peer was not acked by remote peer, local peer did not end the MPSP. This prevents local peer from going to DOZE state. And if the remote peer goes away without closing connection, local peer continues AWAKE state and wastes battery. Signed-off-by: Masashi Honma Acked-by: Bob Copeland Signed-off-by: Johannes Berg diff --git a/net/mac80211/status.c b/net/mac80211/status.c index c6d5c72..a2a6826 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -771,6 +771,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) clear_sta_flag(sta, WLAN_STA_SP); acked = !!(info->flags & IEEE80211_TX_STAT_ACK); + + /* mesh Peer Service Period support */ + if (ieee80211_vif_is_mesh(&sta->sdata->vif) && + ieee80211_is_data_qos(fc)) + ieee80211_mpsp_trigger_process( + ieee80211_get_qos_ctl(hdr), sta, true, acked); + if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) { /* * The STA is in power save mode, so assume @@ -781,13 +788,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } - /* mesh Peer Service Period support */ - if (ieee80211_vif_is_mesh(&sta->sdata->vif) && - ieee80211_is_data_qos(fc)) - ieee80211_mpsp_trigger_process( - ieee80211_get_qos_ctl(hdr), - sta, true, acked); - if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) -- cgit v0.10.2 From 9757235f451c27deaa88925399f070ff6fcea832 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Wed, 3 Aug 2016 10:07:44 +0900 Subject: nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value Previously, NL80211_MESHCONF_HT_OPMODE validation rejected correct flag combinations, e.g. IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT. Doing just a range-check allows setting flags that don't exist (0x8) and invalid flag combinations. Implements some checks based on IEEE 802.11 2012 8.4.2.59 "HT Operation element". Signed-off-by: Masashi Honma [reword commit message, simplify a bit] Signed-off-by: Johannes Berg diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 46417f9..f02653a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5380,6 +5380,7 @@ static int nl80211_parse_mesh_config(struct genl_info *info, { struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; u32 mask = 0; + u16 ht_opmode; #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \ do { \ @@ -5471,9 +5472,36 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0, mask, NL80211_MESHCONF_RSSI_THRESHOLD, nl80211_check_s32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, - mask, NL80211_MESHCONF_HT_OPMODE, - nl80211_check_u16); + /* + * Check HT operation mode based on + * IEEE 802.11 2012 8.4.2.59 HT Operation element. + */ + if (tb[NL80211_MESHCONF_HT_OPMODE]) { + ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); + + if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION | + IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | + IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) + return -EINVAL; + + if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) && + (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) + return -EINVAL; + + switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) { + case IEEE80211_HT_OP_MODE_PROTECTION_NONE: + case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: + if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT) + return -EINVAL; + break; + case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: + case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: + if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) + return -EINVAL; + break; + } + cfg->ht_opmode = ht_opmode; + } FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, 1, 65535, mask, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, -- cgit v0.10.2 From 2439ca0402091badb24415e1b073ba12b34ba423 Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Thu, 4 Aug 2016 15:43:04 +0300 Subject: mac80211: Add ieee80211_hw pointer to get_expected_throughput The variable is added to allow the driver an easy access to it's own hw->priv when the op is invoked. This fixes a crash in wlcore because it was relying on a station pointer that wasn't initialized yet. It's the wrong way to fix the crash, but it solves the problem for now and it does make sense to have the hw pointer here. Signed-off-by: Maxim Altshul [rewrite commit message, fix indentation] Signed-off-by: Johannes Berg diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 1d68916..9e1f2d9 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5700,10 +5700,11 @@ out: mutex_unlock(&wl->mutex); } -static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta) +static u32 wlcore_op_get_expected_throughput(struct ieee80211_hw *hw, + struct ieee80211_sta *sta) { struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv; - struct wl1271 *wl = wl_sta->wl; + struct wl1271 *wl = hw->priv; u8 hlid = wl_sta->hlid; /* return in units of Kbps */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b4faadb..cca510a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3620,7 +3620,8 @@ struct ieee80211_ops { int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); - u32 (*get_expected_throughput)(struct ieee80211_sta *sta); + u32 (*get_expected_throughput)(struct ieee80211_hw *hw, + struct ieee80211_sta *sta); int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int *dbm); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 184473c..ba5fc1f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1094,7 +1094,7 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, trace_drv_get_expected_throughput(sta); if (local->ops->get_expected_throughput) - ret = local->ops->get_expected_throughput(sta); + ret = local->ops->get_expected_throughput(&local->hw, sta); trace_drv_return_u32(local, ret); return ret; -- cgit v0.10.2 From 5ef9f289c4e698054e5687edb54f0da3cdc9173a Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Wed, 3 Aug 2016 15:44:57 +1000 Subject: OVS: Ignore negative headroom value net_device->ndo_set_rx_headroom (introduced in 871b642adebe300be2e50aa5f65a418510f636ec) says "Setting a negtaive value reset the rx headroom to the default value". It seems that the OVS implementation in 3a927bc7cf9d0fbe8f4a8189dd5f8440228f64e7 overlooked this and sets dev->needed_headroom unconditionally. This doesn't have an immediate effect, but can mess up later LL_RESERVED_SPACE calculations, such as done in net/ipv6/mcast.c:mld_newpack. For reference, this issue was found from a skb_panic raised there after the length calculations had given the wrong result. Note the other current users of this interface (drivers/net/tun.c:tun_set_headroom and drivers/net/veth.c:veth_set_rx_headroom) are both checking this correctly thus need no modification. Thanks to Ben for some pointers from the crash dumps! Cc: Benjamin Poirier Cc: Paolo Abeni Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1361414 Signed-off-by: Ian Wienand Signed-off-by: David S. Miller diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 434e04c..95c3614 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -140,7 +140,7 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) static void internal_set_rx_headroom(struct net_device *dev, int new_hr) { - dev->needed_headroom = new_hr; + dev->needed_headroom = new_hr < 0 ? 0 : new_hr; } static const struct net_device_ops internal_dev_netdev_ops = { -- cgit v0.10.2 From 54447f1ad73414ebb052e9a33d079cabed3a03e8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 3 Aug 2016 10:58:35 +0000 Subject: net: arc_emac: add missing of_node_put() in arc_emac_probe() commit a94efbd7cc45 ("ethernet: arc: emac_main: add missing of_node_put after calling of_parse_phandle") added missing of_node_put after calling of_parse_phandle, but missing the devm_ioremap_resource() error handling case. Signed-off-by: Wei Yongjun Reviewed-by: Peter Chen Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 4bff0f3..b0da969 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -771,8 +771,10 @@ int arc_emac_probe(struct net_device *ndev, int interface) priv->dev = dev; priv->regs = devm_ioremap_resource(dev, &res_regs); - if (IS_ERR(priv->regs)) - return PTR_ERR(priv->regs); + if (IS_ERR(priv->regs)) { + err = PTR_ERR(priv->regs); + goto out_put_node; + } dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs); -- cgit v0.10.2 From 372ee16386bbf6dc5eeb0387e1ede963debba82a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 3 Aug 2016 14:11:40 +0100 Subject: rxrpc: Fix races between skb free, ACK generation and replying Inside the kafs filesystem it is possible to occasionally have a call processed and terminated before we've had a chance to check whether we need to clean up the rx queue for that call because afs_send_simple_reply() ends the call when it is done, but this is done in a workqueue item that might happen to run to completion before afs_deliver_to_call() completes. Further, it is possible for rxrpc_kernel_send_data() to be called to send a reply before the last request-phase data skb is released. The rxrpc skb destructor is where the ACK processing is done and the call state is advanced upon release of the last skb. ACK generation is also deferred to a work item because it's possible that the skb destructor is not called in a context where kernel_sendmsg() can be invoked. To this end, the following changes are made: (1) kernel_rxrpc_data_consumed() is added. This should be called whenever an skb is emptied so as to crank the ACK and call states. This does not release the skb, however. kernel_rxrpc_free_skb() must now be called to achieve that. These together replace rxrpc_kernel_data_delivered(). (2) kernel_rxrpc_data_consumed() is wrapped by afs_data_consumed(). This makes afs_deliver_to_call() easier to work as the skb can simply be discarded unconditionally here without trying to work out what the return value of the ->deliver() function means. The ->deliver() functions can, via afs_data_complete(), afs_transfer_reply() and afs_extract_data() mark that an skb has been consumed (thereby cranking the state) without the need to conditionally free the skb to make sure the state is correct on an incoming call for when the call processor tries to send the reply. (3) rxrpc_recvmsg() now has to call kernel_rxrpc_data_consumed() when it has finished with a packet and MSG_PEEK isn't set. (4) rxrpc_packet_destructor() no longer calls rxrpc_hard_ACK_data(). Because of this, we no longer need to clear the destructor and put the call before we free the skb in cases where we don't want the ACK/call state to be cranked. (5) The ->deliver() call-type callbacks are made to return -EAGAIN rather than 0 if they expect more data (afs_extract_data() returns -EAGAIN to the delivery function already), and the caller is now responsible for producing an abort if that was the last packet. (6) There are many bits of unmarshalling code where: ret = afs_extract_data(call, skb, last, ...); switch (ret) { case 0: break; case -EAGAIN: return 0; default: return ret; } is to be found. As -EAGAIN can now be passed back to the caller, we now just return if ret < 0: ret = afs_extract_data(call, skb, last, ...); if (ret < 0) return ret; (7) Checks for trailing data and empty final data packets has been consolidated as afs_data_complete(). So: if (skb->len > 0) return -EBADMSG; if (!last) return 0; becomes: ret = afs_data_complete(call, skb, last); if (ret < 0) return ret; (8) afs_transfer_reply() now checks the amount of data it has against the amount of data desired and the amount of data in the skb and returns an error to induce an abort if we don't get exactly what we want. Without these changes, the following oops can occasionally be observed, particularly if some printks are inserted into the delivery path: general protection fault: 0000 [#1] SMP Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc] CPU: 0 PID: 1305 Comm: kworker/u8:3 Tainted: G E 4.7.0-fsdevel+ #1303 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Workqueue: kafsd afs_async_workfn [kafs] task: ffff88040be041c0 ti: ffff88040c070000 task.ti: ffff88040c070000 RIP: 0010:[] [] __lock_acquire+0xcf/0x15a1 RSP: 0018:ffff88040c073bc0 EFLAGS: 00010002 RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000000 RCX: ffff88040d29a710 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88040d29a710 RBP: ffff88040c073c70 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88040be041c0 R15: ffffffff814c928f FS: 0000000000000000(0000) GS:ffff88041fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa4595f4750 CR3: 0000000001c14000 CR4: 00000000001406f0 Stack: 0000000000000006 000000000be04930 0000000000000000 ffff880400000000 ffff880400000000 ffffffff8108f847 ffff88040be041c0 ffffffff81050446 ffff8803fc08a920 ffff8803fc08a958 ffff88040be041c0 ffff88040c073c38 Call Trace: [] ? mark_held_locks+0x5e/0x74 [] ? __local_bh_enable_ip+0x9b/0xa1 [] ? trace_hardirqs_on_caller+0x16d/0x189 [] lock_acquire+0x122/0x1b6 [] ? lock_acquire+0x122/0x1b6 [] ? skb_dequeue+0x18/0x61 [] _raw_spin_lock_irqsave+0x35/0x49 [] ? skb_dequeue+0x18/0x61 [] skb_dequeue+0x18/0x61 [] afs_deliver_to_call+0x344/0x39d [kafs] [] afs_process_async_call+0x4c/0xd5 [kafs] [] afs_async_workfn+0xe/0x10 [kafs] [] process_one_work+0x29d/0x57c [] worker_thread+0x24a/0x385 [] ? rescuer_thread+0x2d0/0x2d0 [] kthread+0xf3/0xfb [] ret_from_fork+0x1f/0x40 [] ? kthread_create_on_node+0x1cf/0x1cf Signed-off-by: David Howells Signed-off-by: David S. Miller diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 16a924c..70c926a 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -790,13 +790,12 @@ The kernel interface functions are as follows: Data messages can have their contents extracted with the usual bunch of socket buffer manipulation functions. A data message can be determined to be the last one in a sequence with rxrpc_kernel_is_data_last(). When a - data message has been used up, rxrpc_kernel_data_delivered() should be - called on it.. + data message has been used up, rxrpc_kernel_data_consumed() should be + called on it. - Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose - of. It is possible to get extra refs on all types of message for later - freeing, but this may pin the state of a call until the message is finally - freed. + Messages should be handled to rxrpc_kernel_free_skb() to dispose of. It + is possible to get extra refs on all types of message for later freeing, + but this may pin the state of a call until the message is finally freed. (*) Accept an incoming call. @@ -821,12 +820,14 @@ The kernel interface functions are as follows: Other errors may be returned if the call had been aborted (-ECONNABORTED) or had timed out (-ETIME). - (*) Record the delivery of a data message and free it. + (*) Record the delivery of a data message. - void rxrpc_kernel_data_delivered(struct sk_buff *skb); + void rxrpc_kernel_data_consumed(struct rxrpc_call *call, + struct sk_buff *skb); - This is used to record a data message as having been delivered and to - update the ACK state for the call. The socket buffer will be freed. + This is used to record a data message as having been consumed and to + update the ACK state for the call. The message must still be passed to + rxrpc_kernel_free_skb() for disposal by the caller. (*) Free a message. diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 4b0eff6..85737e9 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 1: _debug("extract FID count"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); @@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, _debug("extract FID array"); ret = afs_extract_data(call, skb, last, call->buffer, call->count * 3 * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; _debug("unmarshall FID array"); call->request = kcalloc(call->count, @@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 3: _debug("extract CB count"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; tmp = ntohl(call->tmp); _debug("CB count: %u", tmp); @@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, _debug("extract CB array"); ret = afs_extract_data(call, skb, last, call->request, call->count * 3 * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; _debug("unmarshall CB array"); cb = call->request; @@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, call->unmarshall++; case 5: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* Record that the message was unmarshalled successfully so * that the call destructor can know do the callback breaking @@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, break; } - if (!last) - return 0; call->state = AFS_CALL_REPLYING; @@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, { struct afs_server *server; struct in_addr addr; + int ret; _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, _enter(",{%u},%d", skb->len, last); + /* There are some arguments that we ignore */ + afs_data_consumed(call, skb); if (!last) - return 0; + return -EAGAIN; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work) static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, bool last) { + int ret; + _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; switch (call->unmarshall) { case 0: @@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, break; } - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; call->state = AFS_CALL_REPLYING; @@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, struct sk_buff *skb, bool last) { + int ret; + _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index c2e930e..9312b92 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 1: _debug("extract data length (MSW)"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("DATA length MSW: %u", call->count); @@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 2: _debug("extract data length"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("DATA length: %u", call->count); @@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, ret = afs_extract_data(call, skb, last, buffer, call->count); kunmap_atomic(buffer); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } call->offset = 0; @@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 4: ret = afs_extract_data(call, skb, last, call->buffer, (21 + 3 + 6) * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); @@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; case 5: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; break; } - if (!last) - return 0; - if (call->count < PAGE_SIZE) { _debug("clear"); page = call->reply3; @@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call, { _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; /* shouldn't be any reply data */ - return 0; + /* shouldn't be any reply data */ + return afs_data_complete(call, skb, last); } /* @@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call, { struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call, { struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) { - _leave(" = 0 [more]"); - return 0; - } - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call, afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) { - _leave(" = 0 [more]"); - return 0; - } - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ store_version = NULL; @@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, _debug("extract status"); ret = afs_extract_data(call, skb, last, call->buffer, 12 * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; bp = call->buffer; xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); @@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the volume name length */ case 2: ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); @@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, if (call->count > 0) { ret = afs_extract_data(call, skb, last, call->reply3, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } p = call->reply3; @@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 4: ret = afs_extract_data(call, skb, last, call->buffer, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->offset = 0; call->unmarshall++; @@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the offline message length */ case 5: ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); @@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, if (call->count > 0) { ret = afs_extract_data(call, skb, last, call->reply3, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } p = call->reply3; @@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 7: ret = afs_extract_data(call, skb, last, call->buffer, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->offset = 0; call->unmarshall++; @@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the message of the day length */ case 8: ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); @@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, if (call->count > 0) { ret = afs_extract_data(call, skb, last, call->reply3, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } p = call->reply3; @@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 10: ret = afs_extract_data(call, skb, last, call->buffer, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->offset = 0; call->unmarshall++; no_motd_padding: case 11: - _debug("trailer %d", skb->len); - if (skb->len != 0) - return -EBADMSG; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; break; } - if (!last) - return 0; - _leave(" = 0 [done]"); return 0; } @@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call, struct sk_buff *skb, bool last) { const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 71d5982..df976b2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *); */ extern int afs_open_socket(void); extern void afs_close_socket(void); +extern void afs_data_consumed(struct afs_call *, struct sk_buff *); extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, const struct afs_wait_mode *); extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); -extern void afs_transfer_reply(struct afs_call *, struct sk_buff *); +extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, size_t); +static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb, + bool last) +{ + if (skb->len > 0) + return -EBADMSG; + afs_data_consumed(call, skb); + if (!last) + return -EAGAIN; + return 0; +} + /* * security.c */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 4832de84..14d04c8 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -150,10 +150,9 @@ void afs_close_socket(void) } /* - * note that the data in a socket buffer is now delivered and that the buffer - * should be freed + * Note that the data in a socket buffer is now consumed. */ -static void afs_data_delivered(struct sk_buff *skb) +void afs_data_consumed(struct afs_call *call, struct sk_buff *skb) { if (!skb) { _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); @@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb) } else { _debug("DLVR %p{%u} [%d]", skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - if (atomic_dec_return(&afs_outstanding_skbs) == -1) - BUG(); - rxrpc_kernel_data_delivered(skb); + rxrpc_kernel_data_consumed(call->rxcall, skb); } } @@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call) last = rxrpc_kernel_is_data_last(skb); ret = call->type->deliver(call, skb, last); switch (ret) { + case -EAGAIN: + if (last) { + _debug("short data"); + goto unmarshal_error; + } + break; case 0: - if (last && - call->state == AFS_CALL_AWAIT_REPLY) + ASSERT(last); + if (call->state == AFS_CALL_AWAIT_REPLY) call->state = AFS_CALL_COMPLETE; break; case -ENOTCONN: @@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RX_INVALID_OPERATION; goto do_abort; default: + unmarshal_error: abort_code = RXGEN_CC_UNMARSHAL; if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; @@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call) call->state = AFS_CALL_ERROR; break; } - afs_data_delivered(skb); - skb = NULL; - continue; + break; case RXRPC_SKB_MARK_FINAL_ACK: _debug("Rcv ACK"); call->state = AFS_CALL_COMPLETE; @@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call) } /* - * empty a socket buffer into a flat reply buffer + * Empty a socket buffer into a flat reply buffer. */ -void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) +int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last) { size_t len = skb->len; - if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0) - BUG(); - call->reply_size += len; + if (len > call->reply_max - call->reply_size) { + _leave(" = -EBADMSG [%zu > %u]", + len, call->reply_max - call->reply_size); + return -EBADMSG; + } + + if (len > 0) { + if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, + len) < 0) + BUG(); + call->reply_size += len; + } + + afs_data_consumed(call, skb); + if (!last) + return -EAGAIN; + + if (call->reply_size != call->reply_max) { + _leave(" = -EBADMSG [%u != %u]", + call->reply_size, call->reply_max); + return -EBADMSG; + } + return 0; } /* @@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work) } /* - * grab the operation ID from an incoming cache manager call + * Grab the operation ID from an incoming cache manager call. The socket + * buffer is discarded on error or if we don't yet have sufficient data. */ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, bool last) @@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, call->offset += len; if (call->offset < 4) { - if (last) { - _leave(" = -EBADMSG [op ID short]"); - return -EBADMSG; - } - _leave(" = 0 [incomplete]"); - return 0; + afs_data_consumed(call, skb); + _leave(" = -EAGAIN"); + return -EAGAIN; } call->state = AFS_CALL_AWAIT_REQUEST; @@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) } /* - * extract a piece of data from the received data socket buffers + * Extract a piece of data from the received data socket buffers. */ int afs_extract_data(struct afs_call *call, struct sk_buff *skb, bool last, void *buf, size_t count) @@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb, call->offset += len; if (call->offset < count) { - if (last) { - _leave(" = -EBADMSG [%d < %zu]", call->offset, count); - return -EBADMSG; - } + afs_data_consumed(call, skb); _leave(" = -EAGAIN"); return -EAGAIN; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 340afd0..f94d1ab 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call, struct afs_cache_vlocation *entry; __be32 *bp; u32 tmp; - int loop; + int loop, ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ entry = call->reply; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index ac1bc3c..7b0f886 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -40,12 +40,12 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, unsigned long, gfp_t); int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t); +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_kernel_abort_call(struct rxrpc_call *, u32); void rxrpc_kernel_end_call(struct rxrpc_call *); bool rxrpc_kernel_is_data_last(struct sk_buff *); u32 rxrpc_kernel_get_abort_code(struct sk_buff *); int rxrpc_kernel_get_error_number(struct sk_buff *); -void rxrpc_kernel_data_delivered(struct sk_buff *); void rxrpc_kernel_free_skb(struct sk_buff *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); int rxrpc_kernel_reject_call(struct socket *); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1bb9e7a..ff83fb1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -425,6 +425,7 @@ struct rxrpc_call { spinlock_t lock; rwlock_t state_lock; /* lock for state transition */ atomic_t usage; + atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0b28321..9bae21e 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -130,6 +130,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, call->state = RXRPC_CALL_SERVER_ACCEPTING; list_add_tail(&call->accept_link, &rx->acceptq); rxrpc_get_call(call); + atomic_inc(&call->skb_count); nsp = rxrpc_skb(notification); nsp->call = call; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fc32aa5..f5e9916 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -460,6 +460,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call, ASSERTCMP(sp->call, ==, NULL); sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); /* insert into the buffer in sequence order */ spin_lock_bh(&call->lock); @@ -734,6 +735,7 @@ all_acked: skb->mark = RXRPC_SKB_MARK_FINAL_ACK; sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); spin_lock_bh(&call->lock); if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) BUG(); @@ -793,6 +795,7 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, sp->error = error; sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); spin_lock_bh(&call->lock); ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 91287c9..c47f14f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -491,13 +491,6 @@ void rxrpc_release_call(struct rxrpc_call *call) spin_lock_bh(&call->lock); while ((skb = skb_dequeue(&call->rx_queue)) || (skb = skb_dequeue(&call->rx_oos_queue))) { - sp = rxrpc_skb(skb); - if (sp->call) { - ASSERTCMP(sp->call, ==, call); - rxrpc_put_call(call); - sp->call = NULL; - } - skb->destructor = NULL; spin_unlock_bh(&call->lock); _debug("- zap %s %%%u #%u", @@ -605,6 +598,7 @@ void __rxrpc_put_call(struct rxrpc_call *call) if (atomic_dec_and_test(&call->usage)) { _debug("call %d dead", call->debug_id); + WARN_ON(atomic_read(&call->skb_count) != 0); ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); rxrpc_queue_work(&call->destroyer); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 991a20d..9e0f58e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -55,9 +55,6 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { _debug("already terminated"); ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE); - skb->destructor = NULL; - sp->call = NULL; - rxrpc_put_call(call); rxrpc_free_skb(skb); return 0; } @@ -111,13 +108,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, ret = 0; out: - /* release the socket buffer */ - if (skb) { - skb->destructor = NULL; - sp->call = NULL; - rxrpc_put_call(call); - rxrpc_free_skb(skb); - } + rxrpc_free_skb(skb); _leave(" = %d", ret); return ret; @@ -200,6 +191,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a3fa2ed..9ed66d5 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -203,6 +203,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, } /* we transferred the whole data packet */ + if (!(flags & MSG_PEEK)) + rxrpc_kernel_data_consumed(call, skb); + if (sp->hdr.flags & RXRPC_LAST_PACKET) { _debug("last"); if (rxrpc_conn_is_client(call->conn)) { @@ -360,28 +363,6 @@ wait_error: } /** - * rxrpc_kernel_data_delivered - Record delivery of data message - * @skb: Message holding data - * - * Record the delivery of a data message. This permits RxRPC to keep its - * tracking correct. The socket buffer will be deleted. - */ -void rxrpc_kernel_data_delivered(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call = sp->call; - - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - call->rx_data_recv = sp->hdr.seq; - - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - rxrpc_free_skb(skb); -} - -EXPORT_SYMBOL(rxrpc_kernel_data_delivered); - -/** * rxrpc_kernel_is_data_last - Determine if data message is last one * @skb: Message holding data * diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index eee0cfd9..06c51d4 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -98,11 +98,39 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call, spin_unlock_bh(&call->lock); } +/** + * rxrpc_kernel_data_consumed - Record consumption of data message + * @call: The call to which the message pertains. + * @skb: Message holding data + * + * Record the consumption of a data message and generate an ACK if appropriate. + * The call state is shifted if this was the final packet. The caller must be + * in process context with no spinlocks held. + * + * TODO: Actually generate the ACK here rather than punting this to the + * workqueue. + */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq); + + ASSERTCMP(sp->call, ==, call); + ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA); + + /* TODO: Fix the sequence number tracking */ + ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); + ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); + ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); + + call->rx_data_recv = sp->hdr.seq; + rxrpc_hard_ACK_data(call, sp); +} +EXPORT_SYMBOL(rxrpc_kernel_data_consumed); + /* - * destroy a packet that has an RxRPC control buffer - * - advance the hard-ACK state of the parent call (done here in case something - * in the kernel bypasses recvmsg() and steals the packet directly off of the - * socket receive queue) + * Destroy a packet that has an RxRPC control buffer */ void rxrpc_packet_destructor(struct sk_buff *skb) { @@ -112,9 +140,8 @@ void rxrpc_packet_destructor(struct sk_buff *skb) _enter("%p{%p}", skb, call); if (call) { - /* send the final ACK on a client call */ - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) - rxrpc_hard_ACK_data(call, sp); + if (atomic_dec_return(&call->skb_count) < 0) + BUG(); rxrpc_put_call(call); sp->call = NULL; } -- cgit v0.10.2 From 94d9f1c5906b20053efe375b6d66610bca4b8b64 Mon Sep 17 00:00:00 2001 From: David Forster Date: Wed, 3 Aug 2016 15:13:01 +0100 Subject: ipv4: panic in leaf_walk_rcu due to stale node pointer Panic occurs when issuing "cat /proc/net/route" whilst populating FIB with > 1M routes. Use of cached node pointer in fib_route_get_idx is unsafe. BUG: unable to handle kernel paging request at ffffc90001630024 IP: [] leaf_walk_rcu+0x10/0xe0 PGD 11b08d067 PUD 11b08e067 PMD dac4b067 PTE 0 Oops: 0000 [#1] SMP Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscac snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep virti acpi_cpufreq button parport_pc ppdev lp parport autofs4 ext4 crc16 mbcache jbd tio_ring virtio floppy uhci_hcd ehci_hcd usbcore usb_common libata scsi_mod CPU: 1 PID: 785 Comm: cat Not tainted 4.2.0-rc8+ #4 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff8800da1c0bc0 ti: ffff88011a05c000 task.ti: ffff88011a05c000 RIP: 0010:[] [] leaf_walk_rcu+0x10/0xe0 RSP: 0018:ffff88011a05fda0 EFLAGS: 00010202 RAX: ffff8800d8a40c00 RBX: ffff8800da4af940 RCX: ffff88011a05ff20 RDX: ffffc90001630020 RSI: 0000000001013531 RDI: ffff8800da4af950 RBP: 0000000000000000 R08: ffff8800da1f9a00 R09: 0000000000000000 R10: ffff8800db45b7e4 R11: 0000000000000246 R12: ffff8800da4af950 R13: ffff8800d97a74c0 R14: 0000000000000000 R15: ffff8800d97a7480 FS: 00007fd3970e0700(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffc90001630024 CR3: 000000011a7e4000 CR4: 00000000000006e0 Stack: ffffffff814d00d3 0000000000000000 ffff88011a05ff20 ffff8800da1f9a00 ffffffff811dd8b9 0000000000000800 0000000000020000 00007fd396f35000 ffffffff811f8714 0000000000003431 ffffffff8138dce0 0000000000000f80 Call Trace: [] ? fib_route_seq_start+0x93/0xc0 [] ? seq_read+0x149/0x380 [] ? fsnotify+0x3b4/0x500 [] ? process_echoes+0x70/0x70 [] ? proc_reg_read+0x47/0x70 [] ? __vfs_read+0x23/0xd0 [] ? rw_verify_area+0x52/0xf0 [] ? vfs_read+0x81/0x120 [] ? SyS_read+0x42/0xa0 [] ? entry_SYSCALL_64_fastpath+0x16/0x75 Code: 48 85 c0 75 d8 f3 c3 31 c0 c3 f3 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 a 04 89 f0 33 02 44 89 c9 48 d3 e8 0f b6 4a 05 49 89 RIP [] leaf_walk_rcu+0x10/0xe0 RSP CR2: ffffc90001630024 Signed-off-by: Dave Forster Acked-by: Alexander Duyck Signed-off-by: David S. Miller diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d07fc07..febca0f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2452,9 +2452,7 @@ struct fib_route_iter { static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) { - struct fib_table *tb = iter->main_tb; struct key_vector *l, **tp = &iter->tnode; - struct trie *t; t_key key; /* use cache location of next-to-find key */ @@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, pos -= iter->pos; key = iter->key; } else { - t = (struct trie *)tb->tb_data; - iter->tnode = t->kv; iter->pos = 0; key = 0; } @@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return NULL; iter->main_tb = tb; + t = (struct trie *)tb->tb_data; + iter->tnode = t->kv; if (*pos != 0) return fib_route_get_idx(iter, *pos); - t = (struct trie *)tb->tb_data; - iter->tnode = t->kv; iter->pos = 0; iter->key = 0; -- cgit v0.10.2 From 5e3b724e2767fb6495df1dcccaf7c79585c78ae9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 3 Aug 2016 21:42:18 +0200 Subject: net: dsa: b53: Add missing ULL suffix for 64-bit constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): drivers/net/dsa/b53/b53_common.c: In function ‘b53_arl_read’: drivers/net/dsa/b53/b53_common.c:1072: warning: integer constant is too large for ‘long’ type Fixes: 1da6df85c6fbed8f ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 8f12bdd..a0b453e 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -258,7 +258,7 @@ * BCM5325 and BCM5365 share most definitions below */ #define B53_ARLTBL_MAC_VID_ENTRY(n) (0x10 * (n)) -#define ARLTBL_MAC_MASK 0xffffffffffff +#define ARLTBL_MAC_MASK 0xffffffffffffULL #define ARLTBL_VID_S 48 #define ARLTBL_VID_MASK_25 0xff #define ARLTBL_VID_MASK 0xfff -- cgit v0.10.2 From d9dd26b20cff88b45d861ec786d86b1c9bd2ee60 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 30 Jul 2016 10:05:31 -0700 Subject: MD: hold mddev lock to change bitmap location Changing the location changes a lot of things. Holding the lock to avoid race. This makes the .quiesce called with mddev lock hold too. Acked-by: NeilBrown Signed-off-by: Shaohua Li diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 6fff794..13041ee 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page) static ssize_t location_store(struct mddev *mddev, const char *buf, size_t len) { + int rv; + rv = mddev_lock(mddev); + if (rv) + return rv; if (mddev->pers) { - if (!mddev->pers->quiesce) - return -EBUSY; - if (mddev->recovery || mddev->sync_thread) - return -EBUSY; + if (!mddev->pers->quiesce) { + rv = -EBUSY; + goto out; + } + if (mddev->recovery || mddev->sync_thread) { + rv = -EBUSY; + goto out; + } } if (mddev->bitmap || mddev->bitmap_info.file || mddev->bitmap_info.offset) { /* bitmap already configured. Only option is to clear it */ - if (strncmp(buf, "none", 4) != 0) - return -EBUSY; + if (strncmp(buf, "none", 4) != 0) { + rv = -EBUSY; + goto out; + } if (mddev->pers) { mddev->pers->quiesce(mddev, 1); bitmap_destroy(mddev); @@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len) /* nothing to be done */; else if (strncmp(buf, "file:", 5) == 0) { /* Not supported yet */ - return -EINVAL; + rv = -EINVAL; + goto out; } else { - int rv; if (buf[0] == '+') rv = kstrtoll(buf+1, 10, &offset); else rv = kstrtoll(buf, 10, &offset); if (rv) - return rv; - if (offset == 0) - return -EINVAL; + goto out; + if (offset == 0) { + rv = -EINVAL; + goto out; + } if (mddev->bitmap_info.external == 0 && mddev->major_version == 0 && - offset != mddev->bitmap_info.default_offset) - return -EINVAL; + offset != mddev->bitmap_info.default_offset) { + rv = -EINVAL; + goto out; + } mddev->bitmap_info.offset = offset; if (mddev->pers) { struct bitmap *bitmap; @@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) mddev->pers->quiesce(mddev, 0); if (rv) { bitmap_destroy(mddev); - return rv; + goto out; } } } @@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len) set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); } + rv = 0; +out: + mddev_unlock(mddev); + if (rv) + return rv; return len; } -- cgit v0.10.2 From 11367799f3d12a5074c4a3c0fa4ea8da2a21a2a4 Mon Sep 17 00:00:00 2001 From: Alexey Obitotskiy Date: Wed, 3 Aug 2016 10:02:56 +0200 Subject: md: Prevent IO hold during accessing to faulty raid5 array After array enters in faulty state (e.g. number of failed drives becomes more then accepted for raid5 level) it sets error flags (one of this flags is MD_CHANGE_PENDING). For internal metadata arrays MD_CHANGE_PENDING cleared into md_update_sb, but not for external metadata arrays. MD_CHANGE_PENDING flag set prevents to finish all new or non-finished IOs to array and hold them in pending state. In some cases this can leads to deadlock situation. For example, we have faulty array (2 of 4 drives failed) and udev handle array state changes and blkid started (or other userspace application that used array to read/write) but unable to finish reads due to IO hold. At the same time we unable to get exclusive access to array (to stop array in our case) because another external application still use this array. Fix makes possible to return IO with errors immediately. So external application can finish working with array and give exclusive access to other applications to perform required management actions with array. Signed-off-by: Alexey Obitotskiy Signed-off-by: Shaohua Li diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e379b89..4f8f524 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4640,7 +4640,9 @@ finish: } if (!bio_list_empty(&s.return_bi)) { - if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) { + if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) && + (s.failed <= conf->max_degraded || + conf->mddev->external == 0)) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->return_bi, &s.return_bi); spin_unlock_irq(&conf->device_lock); -- cgit v0.10.2 From a6ed3ea65d9868fdf9eff84e6fe4f666b8d14b02 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 5 Aug 2016 14:01:27 -0700 Subject: bpf: restore behavior of bpf_map_update_elem The introduction of pre-allocated hash elements inadvertently broke the behavior of bpf hash maps where users expected to call bpf_map_update_elem() without considering that the map can be full. Some programs do: old_value = bpf_map_lookup_elem(map, key); if (old_value) { ... prepare new_value on stack ... bpf_map_update_elem(map, key, new_value); } Before pre-alloc the update() for existing element would work even in 'map full' condition. Restore this behavior. The above program could have updated old_value in place instead of update() which would be faster and most programs use that approach, but sometimes the values are large and the programs use update() helper to do atomic replacement of the element. Note we cannot simply update element's value in-place like percpu hash map does and have to allocate extra num_possible_cpu elements and use this extra reserve when the map is full. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index fff3650..570eeca 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -26,11 +26,18 @@ struct bpf_htab { struct bucket *buckets; void *elems; struct pcpu_freelist freelist; + void __percpu *extra_elems; atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ }; +enum extra_elem_state { + HTAB_NOT_AN_EXTRA_ELEM = 0, + HTAB_EXTRA_ELEM_FREE, + HTAB_EXTRA_ELEM_USED +}; + /* each htab element is struct htab_elem + key + value */ struct htab_elem { union { @@ -38,7 +45,10 @@ struct htab_elem { struct bpf_htab *htab; struct pcpu_freelist_node fnode; }; - struct rcu_head rcu; + union { + struct rcu_head rcu; + enum extra_elem_state state; + }; u32 hash; char key[0] __aligned(8); }; @@ -113,6 +123,23 @@ free_elems: return err; } +static int alloc_extra_elems(struct bpf_htab *htab) +{ + void __percpu *pptr; + int cpu; + + pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); + if (!pptr) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = + HTAB_EXTRA_ELEM_FREE; + } + htab->extra_elems = pptr; + return 0; +} + /* Called from syscall */ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) { @@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (percpu) cost += (u64) round_up(htab->map.value_size, 8) * num_possible_cpus() * htab->map.max_entries; + else + cost += (u64) htab->elem_size * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) /* make sure page count doesn't overflow */ @@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) raw_spin_lock_init(&htab->buckets[i].lock); } + if (!percpu) { + err = alloc_extra_elems(htab); + if (err) + goto free_buckets; + } + if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { err = prealloc_elems_and_freelist(htab); if (err) - goto free_buckets; + goto free_extra_elems; } return &htab->map; +free_extra_elems: + free_percpu(htab->extra_elems); free_buckets: kvfree(htab->buckets); free_htab: @@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); kfree(l); - } static void htab_elem_free_rcu(struct rcu_head *head) @@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head) static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) { + if (l->state == HTAB_EXTRA_ELEM_USED) { + l->state = HTAB_EXTRA_ELEM_FREE; + return; + } + if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { pcpu_freelist_push(&htab->freelist, &l->fnode); } else { @@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, - bool percpu, bool onallcpus) + bool percpu, bool onallcpus, + bool old_elem_exists) { u32 size = htab->map.value_size; bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); struct htab_elem *l_new; void __percpu *pptr; + int err = 0; if (prealloc) { l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); if (!l_new) - return ERR_PTR(-E2BIG); + err = -E2BIG; } else { if (atomic_inc_return(&htab->count) > htab->map.max_entries) { atomic_dec(&htab->count); - return ERR_PTR(-E2BIG); + err = -E2BIG; + } else { + l_new = kmalloc(htab->elem_size, + GFP_ATOMIC | __GFP_NOWARN); + if (!l_new) + return ERR_PTR(-ENOMEM); } - l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); - if (!l_new) - return ERR_PTR(-ENOMEM); + } + + if (err) { + if (!old_elem_exists) + return ERR_PTR(err); + + /* if we're updating the existing element and the hash table + * is full, use per-cpu extra elems + */ + l_new = this_cpu_ptr(htab->extra_elems); + if (l_new->state != HTAB_EXTRA_ELEM_FREE) + return ERR_PTR(-E2BIG); + l_new->state = HTAB_EXTRA_ELEM_USED; + } else { + l_new->state = HTAB_NOT_AN_EXTRA_ELEM; } memcpy(l_new->key, key, key_size); @@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (ret) goto err; - l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, + !!l_old); if (IS_ERR(l_new)) { /* all pre-allocated elements are in use or memory exhausted */ ret = PTR_ERR(l_new); @@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, } } else { l_new = alloc_htab_elem(htab, key, value, key_size, - hash, true, onallcpus); + hash, true, onallcpus, false); if (IS_ERR(l_new)) { ret = PTR_ERR(l_new); goto err; @@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map) htab_free_elems(htab); pcpu_freelist_destroy(&htab->freelist); } + free_percpu(htab->extra_elems); kvfree(htab->buckets); kfree(htab); } -- cgit v0.10.2 From ba0cc3c153590e3d31985b8f8914d205a20b0d7a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 5 Aug 2016 14:01:28 -0700 Subject: samples/bpf: add bpf_map_update_elem() tests increase test coverage to check previously missing 'update when full' Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c index 47bf085..cce2b59 100644 --- a/samples/bpf/test_maps.c +++ b/samples/bpf/test_maps.c @@ -68,7 +68,16 @@ static void test_hashmap_sanity(int i, void *data) assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && errno == E2BIG); + /* update existing element, thought the map is full */ + key = 1; + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0); + key = 2; + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + key = 1; + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + /* check that key = 0 doesn't exist */ + key = 0; assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); /* iterate over two elements */ @@ -413,10 +422,12 @@ static void do_work(int fn, void *data) for (i = fn; i < MAP_SIZE; i += TASKS) { key = value = i; - if (do_update) + if (do_update) { assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); - else + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0); + } else { assert(bpf_delete_elem(map_fd, &key) == 0); + } } } -- cgit v0.10.2 From c518189567eaf42b2ec50a4d982484c8e38799f8 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Fri, 5 Aug 2016 10:31:58 +0530 Subject: net: macb: Correct CAPS mask USRIO and JUMBO CAPS have the same mask. Fix the same. Fixes: ce721a702197 ("net: ethernet: cadence-macb: Add disabled usrio caps") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Harini Katakam Acked-by: Nicolas Ferre Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 36893d8..b6fcf10 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -403,11 +403,11 @@ #define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII 0x00000004 #define MACB_CAPS_NO_GIGABIT_HALF 0x00000008 #define MACB_CAPS_USRIO_DISABLED 0x00000010 +#define MACB_CAPS_JUMBO 0x00000020 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 #define MACB_CAPS_MACB_IS_GEM 0x80000000 -#define MACB_CAPS_JUMBO 0x00000010 /* Bit manipulation macros */ #define MACB_BIT(name) \ -- cgit v0.10.2 From 707e6835f89419ff1c05e828c2d9939fd95a7ad8 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 23 Jul 2016 22:16:56 +0800 Subject: netfilter: nf_ct_h323: do not re-activate already expired timer Commit 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of set_expect_timeout") just simplify the source codes if (!del_timer(&exp->timeout)) return 0; add_timer(&exp->timeout); to mod_timer(&exp->timeout, jiffies + info->timeout * HZ); This is not correct, and introduce a race codition: CPU0 CPU1 - timer expire process_rcf expectation_timed_out lock(exp_lock) - find_exp waiting exp_lock... re-activate timer!! waiting exp_lock... unlock(exp_lock) lock(exp_lock) - unlink expect - free(expect) - unlock(exp_lock) So when the timer expires again, we will access the memory that was already freed. Replace mod_timer with mod_timer_pending here to fix this problem. Fixes: 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of set_expect_timeout") Cc: Gao Feng Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index bb77a97..5c0db5c 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1473,7 +1473,8 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, "timeout to %u seconds for", info->timeout); nf_ct_dump_tuple(&exp->tuple); - mod_timer(&exp->timeout, jiffies + info->timeout * HZ); + mod_timer_pending(&exp->timeout, + jiffies + info->timeout * HZ); } spin_unlock_bh(&nf_conntrack_expect_lock); } -- cgit v0.10.2 From 2c86943c20e375b0fe562af0626f2e5461d8d203 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 24 Jul 2016 19:53:19 +0200 Subject: netfilter: nf_tables: s/MFT_REG32_01/NFT_REG32_01 MFT_REG32_01 is a typo, rename this to NFT_REG32_01. Signed-off-by: Pablo Neira Ayuso diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 01751fa..c674ba2 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -24,7 +24,7 @@ enum nft_registers { __NFT_REG_MAX, NFT_REG32_00 = 8, - MFT_REG32_01, + NFT_REG32_01, NFT_REG32_02, NFT_REG32_03, NFT_REG32_04, -- cgit v0.10.2 From 925baf394bb0e65d85330afe467393f66f910353 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 27 Jul 2016 08:41:49 +0300 Subject: tools/gpio: fix gpio-event-mon header comment Fixes: 97f69747d8b1 ('tools/gpio: add the gpio-event-mon tool') Signed-off-by: Baruch Siach Signed-off-by: Linus Walleij diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 448ed96..1c14c25 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -1,5 +1,5 @@ /* - * gpio-hammer - example swiss army knife to shake GPIO lines on a system + * gpio-event-mon - monitor GPIO line events from userspace * * Copyright (C) 2016 Linus Walleij * -- cgit v0.10.2 From 626d2f07de89bf6be3d7301524d0ab3375b81b9c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 4 Aug 2016 19:59:41 +0900 Subject: dmaengine: usb-dmac: check CHCR.DE bit in usb_dmac_isr_channel() The USB-DMAC's interruption happens even if the CHCR.DE is not set to 1 because CHCR.NULLE is set to 1. So, this driver should call usb_dmac_isr_transfer_end() if the DE bit is set to 1 only. Otherwise, the desc is possible to be NULL in the usb_dmac_isr_transfer_end(). Fixes: 0c1c8ff32fa2 ("dmaengine: usb-dmac: Add Renesas USB DMA Controller (USB-DMAC) driver) Cc: # v4.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Vinod Koul diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 749f1bd..06ecdc3 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -600,27 +600,30 @@ static irqreturn_t usb_dmac_isr_channel(int irq, void *dev) { struct usb_dmac_chan *chan = dev; irqreturn_t ret = IRQ_NONE; - u32 mask = USB_DMACHCR_TE; - u32 check_bits = USB_DMACHCR_TE | USB_DMACHCR_SP; + u32 mask = 0; u32 chcr; + bool xfer_end = false; spin_lock(&chan->vc.lock); chcr = usb_dmac_chan_read(chan, USB_DMACHCR); - if (chcr & check_bits) - mask |= USB_DMACHCR_DE | check_bits; + if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) { + mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP; + if (chcr & USB_DMACHCR_DE) + xfer_end = true; + ret |= IRQ_HANDLED; + } if (chcr & USB_DMACHCR_NULL) { /* An interruption of TE will happen after we set FTE */ mask |= USB_DMACHCR_NULL; chcr |= USB_DMACHCR_FTE; ret |= IRQ_HANDLED; } - usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask); + if (mask) + usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask); - if (chcr & check_bits) { + if (xfer_end) usb_dmac_isr_transfer_end(chan); - ret |= IRQ_HANDLED; - } spin_unlock(&chan->vc.lock); -- cgit v0.10.2 From c1eda3c6394f805886b2afa8c7ea5e04305ec698 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 1 Aug 2016 13:13:08 +0200 Subject: netfilter: nft_rbtree: ignore inactive matching element with no descendants If we find a matching element that is inactive with no descendants, we jump to the found label, then crash because of nul-dereference on the left branch. Fix this by checking that the element is active and not an interval end and skipping the logic that only applies to the tree iteration. Signed-off-by: Pablo Neira Ayuso Tested-by: Anders K. Pedersen diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 6473936..ffe9ae0 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -70,7 +70,6 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, } else if (d > 0) parent = parent->rb_right; else { -found: if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; continue; @@ -84,9 +83,12 @@ found: } } - if (set->flags & NFT_SET_INTERVAL && interval != NULL) { - rbe = interval; - goto found; + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && + !nft_rbtree_interval_end(interval)) { + spin_unlock_bh(&nft_rbtree_lock); + *ext = &interval->ext; + return true; } out: spin_unlock_bh(&nft_rbtree_lock); -- cgit v0.10.2 From cf1b18030de29e4e5b0a57695ae5db4a89da0ff7 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 24 Jul 2016 13:53:30 +0200 Subject: USB: serial: option: add D-Link DWM-156/A3 The device has four interfaces; the three serial ports ought to be handled by this driver: 00 Diagnostic interface serial port 01 NMEA device serial port 02 Mass storage (sd card) 03 Modem serial port The other product ids listed in the Windows driver are present already. Signed-off-by: Lubomir Rintel Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8e07536..0338851 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1966,6 +1966,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, -- cgit v0.10.2 From 0d35d0815b19216f852f257afe420f7c7d182780 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 3 Aug 2016 12:41:40 +0200 Subject: netfilter: nf_conntrack_sip: CSeq 0 is a valid CSeq Do not drop packet when CSeq is 0 as 0 is also a valid value for CSeq. simple_strtoul() will return 0 either when all digits are 0 or if there are no digits at all. Therefore when simple_strtoul() returns 0 we check if first character is digit 0 or not. Signed-off-by: Christophe Leroy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 8d9db9d..7d77217 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1383,7 +1383,7 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) { + if (!cseq && *(*dptr + matchoff) != '0') { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1446,7 +1446,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) { + if (!cseq && *(*dptr + matchoff) != '0') { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } -- cgit v0.10.2 From 6977495c06f7f47636a076ee5a0ca571279d9697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20Deli=C3=ABn?= Date: Thu, 28 Jul 2016 18:52:55 +0000 Subject: USB: serial: ftdi_sio: add PIDs for Ivium Technologies devices Ivium Technologies uses the FTDI VID with custom PIDs for their line of electrochemical interfaces and the PalmSens they developed for PalmSens BV. Signed-off-by: Robert Delien Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 0082080..8962cdc 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -648,6 +648,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) }, { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) }, { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index c5d6c1e..067e3a6 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -406,6 +406,12 @@ #define FTDI_4N_GALAXY_DE_3_PID 0xF3C2 /* + * Ivium Technologies product IDs + */ +#define FTDI_PALMSENS_PID 0xf440 +#define FTDI_IVIUM_XSTAT_PID 0xf441 + +/* * Linx Technologies product ids */ #define LINX_SDMUSBQSS_PID 0xF448 /* Linx SDM-USB-QS-S */ -- cgit v0.10.2 From ae34d12cc1e212ffcd92e069030e54dae69c832f Mon Sep 17 00:00:00 2001 From: "Sheng-Hui J. Chu" Date: Thu, 28 Jul 2016 17:01:45 -0400 Subject: USB: serial: ftdi_sio: add device ID for WICED USB UART dev board BCM20706V2_EVAL is a WICED dev board designed with FT2232H USB 2.0 UART/FIFO IC. To support BCM920706V2_EVAL dev board for WICED development on Linux. Add the VID(0a5c) and PID(6422) to ftdi_sio driver to allow loading ftdi_sio for this board. Signed-off-by: Sheng-Hui J. Chu Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8962cdc..b2d767e 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1010,6 +1010,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) }, + { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 067e3a6..f87a938 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -679,6 +679,12 @@ #define INTREPID_NEOVI_PID 0x0701 /* + * WICED USB UART + */ +#define WICED_VID 0x0A5C +#define WICED_USB20706V2_PID 0x6422 + +/* * Definitions for ID TECH (www.idt-net.com) devices */ #define IDTECH_VID 0x0ACD /* ID TECH Vendor ID */ -- cgit v0.10.2 From 01d7956b58e644ea0d2e8d9340c5727a8fc39d70 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 2 Aug 2016 11:29:25 +0200 Subject: USB: serial: option: add support for Telit LE920A4 This patch adds a set of compositions for Telit LE920A4. Compositions in short are: 0x1207: tty + tty 0x1208: tty + adb + tty + tty 0x1211: tty + adb + ecm 0x1212: tty + adb 0x1213: ecm + tty 0x1214: tty + adb + ecm + tty telit_le922_blacklist_usbcfg3 is reused for compositions 0x1211 and 0x1214 due to the same interfaces positions. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0338851..bc47258 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -274,6 +274,12 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 +#define TELIT_PRODUCT_LE920A4_1207 0x1207 +#define TELIT_PRODUCT_LE920A4_1208 0x1208 +#define TELIT_PRODUCT_LE920A4_1211 0x1211 +#define TELIT_PRODUCT_LE920A4_1212 0x1212 +#define TELIT_PRODUCT_LE920A4_1213 0x1213 +#define TELIT_PRODUCT_LE920A4_1214 0x1214 /* ZTE PRODUCTS */ #define ZTE_VENDOR_ID 0x19d2 @@ -628,6 +634,11 @@ static const struct option_blacklist_info telit_le920_blacklist = { .reserved = BIT(1) | BIT(5), }; +static const struct option_blacklist_info telit_le920a4_blacklist_1 = { + .sendsetup = BIT(0), + .reserved = BIT(1), +}; + static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = { .sendsetup = BIT(2), .reserved = BIT(0) | BIT(1) | BIT(3), @@ -1203,6 +1214,16 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), .driver_info = (kernel_ulong_t)&telit_le920_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208), + .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212), + .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf1_blacklist }, -- cgit v0.10.2 From 647024a7df36014bbc4479d92d88e6b77c0afcf6 Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Mon, 8 Aug 2016 02:34:46 +0100 Subject: USB: serial: fix memleak in driver-registration error path udriver struct allocated by kzalloc() will not be freed if usb_register() and next calls fail. This patch fixes this by adding one more step with kfree(udriver) in error path. Signed-off-by: Alexey Klimov Acked-by: Alan Stern Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index b1b9bac..d213cf4 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1433,7 +1433,7 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[] rc = usb_register(udriver); if (rc) - return rc; + goto failed_usb_register; for (sd = serial_drivers; *sd; ++sd) { (*sd)->usb_driver = udriver; @@ -1451,6 +1451,8 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[] while (sd-- > serial_drivers) usb_serial_deregister(*sd); usb_deregister(udriver); +failed_usb_register: + kfree(udriver); return rc; } EXPORT_SYMBOL_GPL(usb_serial_register_drivers); -- cgit v0.10.2 From 0956254a2d5b9e2141385514553aeef694dfe3b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 8 Aug 2016 15:08:49 +0200 Subject: ovl: don't copy up opaqueness When a copy up of a directory occurs which has the opaque xattr set, the xattr remains in the upper directory. The immediate behavior with overlayfs is that the upper directory is not treated as opaque, however after a remount the opaque flag is used and upper directory is treated as opaque. This causes files created in the lower layer to be hidden when using multiple lower directories. Fix by not copying up the opaque flag. To reproduce: ----8<---------8<---------8<---------8<---------8<---------8<---- mkdir -p l/d/s u v w mnt mount -t overlay overlay -olowerdir=l,upperdir=u,workdir=w mnt rm -rf mnt/d/ mkdir -p mnt/d/n umount mnt mount -t overlay overlay -olowerdir=u:l,upperdir=v,workdir=w mnt touch mnt/d/foo umount mnt mount -t overlay overlay -olowerdir=u:l,upperdir=v,workdir=w mnt ls mnt/d ----8<---------8<---------8<---------8<---------8<---------8<---- output should be: "foo n" Reported-by: Derek McGowan Link: https://bugzilla.kernel.org/show_bug.cgi?id=151291 Signed-off-by: Miklos Szeredi Cc: diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 54e5d66..43fdc27 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -80,6 +80,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) } for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { + if (ovl_is_private_xattr(name)) + continue; retry: size = vfs_getxattr(old, name, value, value_size); if (size == -ERANGE) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1b885c1..024352f 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -191,7 +191,7 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) return err; } -static bool ovl_is_private_xattr(const char *name) +bool ovl_is_private_xattr(const char *name) { #define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "." return strncmp(name, OVL_XATTR_PRE_NAME, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e4f5c95..34839bd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -193,6 +193,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); +bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode); struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode); -- cgit v0.10.2 From bf47dc572aaf53c3f2311c71e03ec01be3131fd6 Mon Sep 17 00:00:00 2001 From: Sascha Silbe Date: Mon, 4 Jul 2016 17:16:47 +0200 Subject: s390: clarify compressed image code path The way the decompressor is hooked into the start-up code is rather subtle, with a mix of multiply-defined symbols and hardcoded address literals. Add some comments at the junction points to clarify how it works. Signed-off-by: Sascha Silbe Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index f86a4ee..28c4f96 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -21,16 +21,21 @@ ENTRY(startup_continue) lg %r15,.Lstack-.LPG1(%r13) aghi %r15,-160 brasl %r14,decompress_kernel - # setup registers for memory mover & branch to target + # Set up registers for memory mover. We move the decompressed image to + # 0x11000, starting at offset 0x11000 in the decompressed image so + # that code living at 0x11000 in the image will end up at 0x11000 in + # memory. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) la %r4,0(%r2,%r4) lg %r3,.Lmvsize-.LPG1(%r13) lgr %r5,%r3 - # move the memory mover someplace safe + # Move the memory mover someplace safe so it doesn't overwrite itself. la %r1,0x200 mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) - # decompress image is started at 0x11000 + # When the memory mover is done we pass control to + # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in + # the decompressed image. lgr %r6,%r2 br %r1 mover: diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 56e4d82..4431905 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -309,7 +309,9 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities - /* Continue with startup code in head64.S */ +# For uncompressed images, continue in +# arch/s390/kernel/head64.S. For compressed images, continue in +# arch/s390/boot/compressed/head.S. jg startup_continue .Lstack: -- cgit v0.10.2 From 134a24cd895c5d138d639a0741ad27e389cd4562 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 3 Aug 2016 17:55:00 +0200 Subject: s390/crc32-vx: Fix checksum calculation for small sizes The current prealign logic will fail for sizes < alignment, as the new datalen passed to the vector function is smaller than zero. Being a size_t this gets wrapped to a huge number causing memory overruns and wrong data. Let's add an early exit if the size is smaller than the minimal size with alignment. This will also avoid calling the software fallback twice for all sizes smaller than the minimum size (prealign + remaining) Signed-off-by: Christian Borntraeger Fixes: f848dbd3bc1a ("s390/crc32-vx: add crypto API module for optimized CRC-32 algorithms") Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 577ae1d..2bad9d8 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -51,6 +51,9 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); struct kernel_fpu vxstate; \ unsigned long prealign, aligned, remaining; \ \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ + return ___crc32_sw(crc, data, datalen); \ + \ if ((unsigned long)data & VX_ALIGN_MASK) { \ prealign = VX_ALIGNMENT - \ ((unsigned long)data & VX_ALIGN_MASK); \ @@ -59,9 +62,6 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); data = (void *)((unsigned long)data + prealign); \ } \ \ - if (datalen < VX_MIN_LEN) \ - return ___crc32_sw(crc, data, datalen); \ - \ aligned = datalen & ~VX_ALIGN_MASK; \ remaining = datalen & VX_ALIGN_MASK; \ \ -- cgit v0.10.2 From e2efc424549eb66d227dfe6d073d1789a24bc698 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Aug 2016 10:24:30 +0200 Subject: s390/lib: fix memcmp and strstr if two string compare equal the clcle instruction will update the string addresses to point _after_ the string. This might already be on a different page, so we should not use these pointer to calculate the difference as in that case the calculation of the difference can cause oopses. The return value of memcmp does not need the difference, we can just reuse the condition code and return for CC=1 (All bytes compared, first operand low) -1 and for CC=2 (All bytes compared, first operand high) +1 strstr also does not need the diff. While fixing this, make the common function clcle "correct on its own" by using l1 instead of l2 for the first length. strstr will call this with l2 for both strings. Signed-off-by: Christian Borntraeger Fixes: db7f5eef3dc0 ("s390/lib: use basic blocks for inline assemblies") Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index e390bbb..48352bf 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -237,11 +237,10 @@ char * strrchr(const char * s, int c) EXPORT_SYMBOL(strrchr); static inline int clcle(const char *s1, unsigned long l1, - const char *s2, unsigned long l2, - int *diff) + const char *s2, unsigned long l2) { register unsigned long r2 asm("2") = (unsigned long) s1; - register unsigned long r3 asm("3") = (unsigned long) l2; + register unsigned long r3 asm("3") = (unsigned long) l1; register unsigned long r4 asm("4") = (unsigned long) s2; register unsigned long r5 asm("5") = (unsigned long) l2; int cc; @@ -252,7 +251,6 @@ static inline int clcle(const char *s1, unsigned long l1, " srl %0,28" : "=&d" (cc), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5) : : "cc"); - *diff = *(char *)r2 - *(char *)r4; return cc; } @@ -270,9 +268,9 @@ char * strstr(const char * s1,const char * s2) return (char *) s1; l1 = __strend(s1) - s1; while (l1-- >= l2) { - int cc, dummy; + int cc; - cc = clcle(s1, l1, s2, l2, &dummy); + cc = clcle(s1, l2, s2, l2); if (!cc) return (char *) s1; s1++; @@ -313,11 +311,11 @@ EXPORT_SYMBOL(memchr); */ int memcmp(const void *cs, const void *ct, size_t n) { - int ret, diff; + int ret; - ret = clcle(cs, n, ct, n, &diff); + ret = clcle(cs, n, ct, n); if (ret) - ret = diff; + ret = ret == 1 ? -1 : 1; return ret; } EXPORT_SYMBOL(memcmp); -- cgit v0.10.2 From 6e127efeeae5bbbc323e028d96c43dec0cc7b1b8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Aug 2016 12:51:09 +0200 Subject: s390/config: make the vector optimized crc function builtin For all configs with CONFIG_BTRFS_FS = y we should also make the optimized crc module builtin. Otherwise early mounts will fall back to the software variant. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 889ea34..26e0c7f 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -678,7 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 1bcfd76..24879da 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -616,7 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 13ff090..a5c1e5f 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -615,7 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/defconfig b/arch/s390/defconfig index ccccebe..73610f2 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -234,7 +234,7 @@ CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set -- cgit v0.10.2 From 7de6a63ca2ff21ea67e50a546ca0e9bb6e2b0718 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 28 Jul 2016 20:26:37 +0200 Subject: s390/cio: stop using subchannel_id from ccw_device_private We want to get rid of the copy of struct subchannel_id maintained in ccw_device_private, so obtain it from the subchannel directly. Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c index 15b56a1..9bc3512 100644 --- a/drivers/s390/cio/device_status.c +++ b/drivers/s390/cio/device_status.c @@ -26,6 +26,7 @@ static void ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) { + struct subchannel *sch = to_subchannel(cdev->dev.parent); char dbf_text[15]; if (!scsw_is_valid_cstat(&irb->scsw) || @@ -36,10 +37,10 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) "received" " ... device %04x on subchannel 0.%x.%04x, dev_stat " ": %02X sch_stat : %02X\n", - cdev->private->dev_id.devno, cdev->private->schid.ssid, - cdev->private->schid.sch_no, + cdev->private->dev_id.devno, sch->schid.ssid, + sch->schid.sch_no, scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw)); - sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no); + sprintf(dbf_text, "chk%x", sch->schid.sch_no); CIO_TRACE_EVENT(0, dbf_text); CIO_HEX_EVENT(0, irb, sizeof(struct irb)); } -- cgit v0.10.2 From 9080c92494f0d7e2bca1197bbddcc417117057c3 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 28 Jul 2016 20:30:31 +0200 Subject: s390/qdio: obtain subchannel_id via ccw_device_get_schid() We want to get rid of the copy of struct subchannel_id maintained in ccw_device_private, so obtain it using ccw_device_get_schid(). Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Reviewed-by: Ursula Braun Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4bb5262f..94e9620 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1066,10 +1066,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; int cstat, dstat; if (!intparm || !irq_ptr) { - DBF_ERROR("qint:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_ERROR("qint:%4x", schid.sch_no); return; } @@ -1122,12 +1124,14 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, int qdio_get_ssqd_desc(struct ccw_device *cdev, struct qdio_ssqd_desc *data) { + struct subchannel_id schid; if (!cdev || !cdev->private) return -EINVAL; - DBF_EVENT("get ssqd:%4x", cdev->private->schid.sch_no); - return qdio_setup_get_ssqd(NULL, &cdev->private->schid, data); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("get ssqd:%4x", schid.sch_no); + return qdio_setup_get_ssqd(NULL, &schid, data); } EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc); @@ -1154,6 +1158,7 @@ static void qdio_shutdown_queues(struct ccw_device *cdev) int qdio_shutdown(struct ccw_device *cdev, int how) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; int rc; unsigned long flags; @@ -1161,7 +1166,8 @@ int qdio_shutdown(struct ccw_device *cdev, int how) return -ENODEV; WARN_ON_ONCE(irqs_disabled()); - DBF_EVENT("qshutdown:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qshutdown:%4x", schid.sch_no); mutex_lock(&irq_ptr->setup_mutex); /* @@ -1228,11 +1234,13 @@ EXPORT_SYMBOL_GPL(qdio_shutdown); int qdio_free(struct ccw_device *cdev) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; if (!irq_ptr) return -ENODEV; - DBF_EVENT("qfree:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qfree:%4x", schid.sch_no); DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf abandoned"); mutex_lock(&irq_ptr->setup_mutex); @@ -1251,9 +1259,11 @@ EXPORT_SYMBOL_GPL(qdio_free); */ int qdio_allocate(struct qdio_initialize *init_data) { + struct subchannel_id schid; struct qdio_irq *irq_ptr; - DBF_EVENT("qallocate:%4x", init_data->cdev->private->schid.sch_no); + ccw_device_get_schid(init_data->cdev, &schid); + DBF_EVENT("qallocate:%4x", schid.sch_no); if ((init_data->no_input_qs && !init_data->input_handler) || (init_data->no_output_qs && !init_data->output_handler)) @@ -1331,12 +1341,14 @@ static void qdio_detect_hsicq(struct qdio_irq *irq_ptr) */ int qdio_establish(struct qdio_initialize *init_data) { - struct qdio_irq *irq_ptr; struct ccw_device *cdev = init_data->cdev; + struct subchannel_id schid; + struct qdio_irq *irq_ptr; unsigned long saveflags; int rc; - DBF_EVENT("qestablish:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qestablish:%4x", schid.sch_no); irq_ptr = cdev->private->qdio_data; if (!irq_ptr) @@ -1407,11 +1419,13 @@ EXPORT_SYMBOL_GPL(qdio_establish); */ int qdio_activate(struct ccw_device *cdev) { + struct subchannel_id schid; struct qdio_irq *irq_ptr; int rc; unsigned long saveflags; - DBF_EVENT("qactivate:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qactivate:%4x", schid.sch_no); irq_ptr = cdev->private->qdio_data; if (!irq_ptr) -- cgit v0.10.2 From 1ab50a99f8cc6f4d761421d55a4044ad7f185cb5 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 29 Jul 2016 10:43:53 +0200 Subject: s390/cio: remove subchannel_id from ccw_device_private A copy of struct subchannel_id is maintained in ccw_device_private. The subchannel id is a property of the subchannel. The additional copy is not needed. Internal users can obtain it from subchannel.schid - device drivers can use ccw_device_get_schid(). Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 7ada078..6a58bc8 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -762,7 +762,6 @@ static int io_subchannel_initialize_dev(struct subchannel *sch, priv->state = DEV_STATE_NOT_OPER; priv->dev_id.devno = sch->schib.pmcw.dev; priv->dev_id.ssid = sch->schid.ssid; - priv->schid = sch->schid; INIT_WORK(&priv->todo_work, ccw_device_todo); INIT_LIST_HEAD(&priv->cmb_list); @@ -1000,7 +999,6 @@ static int ccw_device_move_to_sch(struct ccw_device *cdev, put_device(&old_sch->dev); /* Initialize new subchannel. */ spin_lock_irq(sch->lock); - cdev->private->schid = sch->schid; cdev->ccwlock = sch->lock; if (!sch_is_pseudo_sch(sch)) sch_set_cdev(sch, cdev); diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index 8975060..220f491 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -120,7 +120,6 @@ struct ccw_device_private { int state; /* device state */ atomic_t onoff; struct ccw_dev_id dev_id; /* device id */ - struct subchannel_id schid; /* subchannel number */ struct ccw_request req; /* internal I/O request */ int iretry; u8 pgid_valid_mask; /* mask of valid PGIDs */ -- cgit v0.10.2 From a48ed867153c6d2f6d058267213a82dbd8b6737a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 29 Jul 2016 13:41:20 +0200 Subject: s390/qdio: get rid of spin_lock_irqsave usage All qdio functions that use spin_lock_irqsave are never used from irq context. Thus it is safe to convert all of them to use spin_lock_irq. Signed-off-by: Sebastian Ott Reviewed-by: Ursula Braun Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 94e9620..b200f27 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1160,7 +1160,6 @@ int qdio_shutdown(struct ccw_device *cdev, int how) struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; int rc; - unsigned long flags; if (!irq_ptr) return -ENODEV; @@ -1190,7 +1189,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how) qdio_shutdown_debug_entries(irq_ptr); /* cleanup subchannel */ - spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + spin_lock_irq(get_ccwdev_lock(cdev)); if (how & QDIO_FLAG_CLEANUP_USING_CLEAR) rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP); @@ -1204,12 +1203,12 @@ int qdio_shutdown(struct ccw_device *cdev, int how) } qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + spin_unlock_irq(get_ccwdev_lock(cdev)); wait_event_interruptible_timeout(cdev->private->wait_q, irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || irq_ptr->state == QDIO_IRQ_STATE_ERR, 10 * HZ); - spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + spin_lock_irq(get_ccwdev_lock(cdev)); no_cleanup: qdio_shutdown_thinint(irq_ptr); @@ -1217,7 +1216,7 @@ no_cleanup: /* restore interrupt handler */ if ((void *)cdev->handler == (void *)qdio_int_handler) cdev->handler = irq_ptr->orig_handler; - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + spin_unlock_irq(get_ccwdev_lock(cdev)); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); mutex_unlock(&irq_ptr->setup_mutex); @@ -1344,7 +1343,6 @@ int qdio_establish(struct qdio_initialize *init_data) struct ccw_device *cdev = init_data->cdev; struct subchannel_id schid; struct qdio_irq *irq_ptr; - unsigned long saveflags; int rc; ccw_device_get_schid(cdev, &schid); @@ -1373,7 +1371,7 @@ int qdio_establish(struct qdio_initialize *init_data) irq_ptr->ccw.count = irq_ptr->equeue.count; irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr); - spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + spin_lock_irq(get_ccwdev_lock(cdev)); ccw_device_set_options_mask(cdev, 0); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0); @@ -1381,7 +1379,7 @@ int qdio_establish(struct qdio_initialize *init_data) DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); } - spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { mutex_unlock(&irq_ptr->setup_mutex); @@ -1422,7 +1420,6 @@ int qdio_activate(struct ccw_device *cdev) struct subchannel_id schid; struct qdio_irq *irq_ptr; int rc; - unsigned long saveflags; ccw_device_get_schid(cdev, &schid); DBF_EVENT("qactivate:%4x", schid.sch_no); @@ -1445,7 +1442,7 @@ int qdio_activate(struct ccw_device *cdev) irq_ptr->ccw.count = irq_ptr->aqueue.count; irq_ptr->ccw.cda = 0; - spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + spin_lock_irq(get_ccwdev_lock(cdev)); ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE, @@ -1454,7 +1451,7 @@ int qdio_activate(struct ccw_device *cdev) DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); } - spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) goto out; -- cgit v0.10.2 From ddebf6612c5dcc479725867b9fd9a4d98f41350f Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 29 Jul 2016 14:00:27 +0200 Subject: s390/qdio: fix double return code evaluation qdio sometimes checks return codes twice. First with the ccw device's lock held and then a 2nd time after the lock is released. Simplify the code by releasing the lock earlier and unify the return code evaluation. Signed-off-by: Sebastian Ott Reviewed-by: Ursula Braun Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index b200f27..da96978 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1375,13 +1375,10 @@ int qdio_establish(struct qdio_initialize *init_data) ccw_device_set_options_mask(cdev, 0); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); - } - spin_unlock_irq(get_ccwdev_lock(cdev)); - - if (rc) { mutex_unlock(&irq_ptr->setup_mutex); qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); return rc; @@ -1447,14 +1444,12 @@ int qdio_activate(struct ccw_device *cdev) rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE, 0, DOIO_DENY_PREFETCH); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); - } - spin_unlock_irq(get_ccwdev_lock(cdev)); - - if (rc) goto out; + } if (is_thinint_irq(irq_ptr)) tiqdio_add_input_queues(irq_ptr); -- cgit v0.10.2 From 6e30c549f6cab7a41d0934cea80822a2211132ae Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 2 Aug 2016 16:01:39 +0200 Subject: s390/qdio: remove checks for ccw device internal state Prior to starting IO qdio checks for the internal state of the ccw device. These checks happen without locking, so consistency between state evaluation and starting of the IO is not guaranteed. Since the internal state is checked during ccw_device_start it is safe to get rid of these additional checks. Signed-off-by: Sebastian Ott Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index da96978..fda3b26 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1352,9 +1352,6 @@ int qdio_establish(struct qdio_initialize *init_data) if (!irq_ptr) return -ENODEV; - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - mutex_lock(&irq_ptr->setup_mutex); qdio_setup_irq(init_data); @@ -1425,9 +1422,6 @@ int qdio_activate(struct ccw_device *cdev) if (!irq_ptr) return -ENODEV; - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - mutex_lock(&irq_ptr->setup_mutex); if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) { rc = -EBUSY; -- cgit v0.10.2 From 9bce8b2cbebf9f11b41021ccb98c6b18d1796edd Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 5 Aug 2016 12:33:10 +0200 Subject: s390/qdio: avoid reschedule of outbound tasklet once killed During qdio_shutdown the queue tasklets are killed for all inbound and outbound queues. The queue structures might be freed after qdio_shutdown. Thus it must be guaranteed that these queue tasklets are not rescheduled after that. In addition the outbound queue timers are deleted and it must be guaranteed that these timers are not restarted after qdio_shutdown processing. Timer deletion should make use of del_timer_sync() to make sure qdio_outbound_timer() is finished on other CPUs as well. Queue tasklets should be scheduled in state QDIO_IRQ_STATE_ACTIVE only. Signed-off-by: Ursula Braun Reviewed-by: Benjamin Block Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index fda3b26..71bf9bd 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -686,6 +686,15 @@ static void qdio_kick_handler(struct qdio_q *q) q->qdio_error = 0; } +static inline int qdio_tasklet_schedule(struct qdio_q *q) +{ + if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) { + tasklet_schedule(&q->tasklet); + return 0; + } + return -EPERM; +} + static void __qdio_inbound_processing(struct qdio_q *q) { qperf_inc(q, tasklet_inbound); @@ -698,10 +707,8 @@ static void __qdio_inbound_processing(struct qdio_q *q) if (!qdio_inbound_q_done(q)) { /* means poll time is not yet over */ qperf_inc(q, tasklet_inbound_resched); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) { - tasklet_schedule(&q->tasklet); + if (!qdio_tasklet_schedule(q)) return; - } } qdio_stop_polling(q); @@ -711,8 +718,7 @@ static void __qdio_inbound_processing(struct qdio_q *q) */ if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched2); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -869,16 +875,15 @@ static void __qdio_outbound_processing(struct qdio_q *q) * is noticed and outbound_handler is called after some time. */ if (qdio_outbound_q_done(q)) - del_timer(&q->u.out.timer); + del_timer_sync(&q->u.out.timer); else - if (!timer_pending(&q->u.out.timer)) + if (!timer_pending(&q->u.out.timer) && + likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) mod_timer(&q->u.out.timer, jiffies + 10 * HZ); return; sched: - if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) - return; - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } /* outbound tasklet */ @@ -892,9 +897,7 @@ void qdio_outbound_timer(unsigned long data) { struct qdio_q *q = (struct qdio_q *)data; - if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) - return; - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } static inline void qdio_check_outbound_after_thinint(struct qdio_q *q) @@ -907,7 +910,7 @@ static inline void qdio_check_outbound_after_thinint(struct qdio_q *q) for_each_output_queue(q->irq_ptr, out, i) if (!qdio_outbound_q_done(out)) - tasklet_schedule(&out->tasklet); + qdio_tasklet_schedule(out); } static void __tiqdio_inbound_processing(struct qdio_q *q) @@ -929,10 +932,8 @@ static void __tiqdio_inbound_processing(struct qdio_q *q) if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) { - tasklet_schedule(&q->tasklet); + if (!qdio_tasklet_schedule(q)) return; - } } qdio_stop_polling(q); @@ -942,8 +943,7 @@ static void __tiqdio_inbound_processing(struct qdio_q *q) */ if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched2); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -977,7 +977,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) int i; struct qdio_q *q; - if (unlikely(irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) + if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) return; for_each_input_queue(irq_ptr, q, i) { @@ -1003,7 +1003,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) continue; if (need_siga_sync(q) && need_siga_sync_out_after_pci(q)) qdio_siga_sync_q(q); - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -1145,7 +1145,7 @@ static void qdio_shutdown_queues(struct ccw_device *cdev) tasklet_kill(&q->tasklet); for_each_output_queue(irq_ptr, q, i) { - del_timer(&q->u.out.timer); + del_timer_sync(&q->u.out.timer); tasklet_kill(&q->tasklet); } } @@ -1585,10 +1585,11 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags, /* in case of SIGA errors we must process the error immediately */ if (used >= q->u.out.scan_threshold || rc) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); else /* free the SBALs in case of no further traffic */ - if (!timer_pending(&q->u.out.timer)) + if (!timer_pending(&q->u.out.timer) && + likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) mod_timer(&q->u.out.timer, jiffies + HZ); return rc; } -- cgit v0.10.2 From f743e70e8f8f3200dde1dcf50c14aa2900d65071 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 3 Aug 2016 19:37:03 +0200 Subject: crypto: sha3 - Add missing ULL suffixes for 64-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): crypto/sha3_generic.c:27: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:28: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:29: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:29: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:31: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:31: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:32: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:32: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:32: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:33: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:33: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:34: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:34: warning: integer constant is too large for ‘long’ type Fixes: 53964b9ee63b7075 ("crypto: sha3 - Add SHA-3 hash algorithm") Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 6226439..7e8ed96 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -24,14 +24,14 @@ #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) static const u64 keccakf_rndc[24] = { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 + 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, + 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL, + 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL, + 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, + 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, + 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL, + 0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL, + 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL }; static const int keccakf_rotc[24] = { -- cgit v0.10.2 From 1d2d87e81ea21f64c19b95ef228b865a6880e17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 4 Aug 2016 20:02:46 +0300 Subject: crypto: caam - fix echainiv(authenc) encrypt shared descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few things missed by the conversion to the new AEAD interface: 1 - echainiv(authenc) encrypt shared descriptor The shared descriptor is incorrect: due to the order of operations, at some point in time MATH3 register is being overwritten. 2 - buffer used for echainiv(authenc) encrypt shared descriptor Encrypt and givencrypt shared descriptors (for AEAD ops) are mutually exclusive and thus use the same buffer in context state: sh_desc_enc. However, there's one place missed by s/sh_desc_givenc/sh_desc_enc, leading to errors when echainiv(authenc(...)) algorithms are used: DECO: desc idx 14: Header Error. Invalid length or parity, or certain other problems. While here, also fix a typo: dma_mapping_error() is checking for validity of sh_desc_givenc_dma instead of sh_desc_enc_dma. Cc: # 4.3+ Fixes: 479bcc7c5b9e ("crypto: caam - Convert authenc to new AEAD interface") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ea8189f..e356005 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -614,7 +614,7 @@ skip_enc: keys_fit_inline = true; /* aead_givencrypt shared descriptor */ - desc = ctx->sh_desc_givenc; + desc = ctx->sh_desc_enc; /* Note: Context registers are saved. */ init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); @@ -645,13 +645,13 @@ copy_iv: append_operation(desc, ctx->class2_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - /* ivsize + cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - /* Read and write assoclen bytes */ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + /* ivsize + cryptlen = seqoutlen - authsize */ + append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); + /* Skip assoc data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); @@ -697,7 +697,7 @@ copy_iv: ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { + if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -- cgit v0.10.2 From 2fdea258fde036a87d3396ec9c0ef66f10768530 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 4 Aug 2016 20:02:47 +0300 Subject: crypto: caam - defer aead_set_sh_desc in case of zero authsize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to generate shared descriptors for AEAD, the authentication size needs to be known. However, there is no imposed order of calling .setkey, .setauthsize callbacks. Thus, in case authentication size is not known at .setkey time, defer it until .setauthsize is called. The authsize != 0 check was incorrectly removed when converting the driver to the new AEAD interface. Cc: # 4.3+ Fixes: 479bcc7c5b9e ("crypto: caam - Convert authenc to new AEAD interface") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index e356005..6dc5971 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -441,6 +441,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead) OP_ALG_AAI_CTR_MOD128); const bool is_rfc3686 = alg->caam.rfc3686; + if (!ctx->authsize) + return 0; + /* NULL encryption / decryption */ if (!ctx->enckeylen) return aead_null_set_sh_desc(aead); -- cgit v0.10.2 From dca3f53c02e325bb19dfc05b1571b9a706226fea Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 4 Aug 2016 12:11:55 +0200 Subject: sctp: Export struct sctp_info to userspace This is required to correctly interpret INET_DIAG_INFO messages exported by sctp_diag module. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller diff --git a/include/linux/sctp.h b/include/linux/sctp.h index de1f643..fcb4c36 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -705,70 +705,6 @@ typedef struct sctp_auth_chunk { sctp_authhdr_t auth_hdr; } __packed sctp_auth_chunk_t; -struct sctp_info { - __u32 sctpi_tag; - __u32 sctpi_state; - __u32 sctpi_rwnd; - __u16 sctpi_unackdata; - __u16 sctpi_penddata; - __u16 sctpi_instrms; - __u16 sctpi_outstrms; - __u32 sctpi_fragmentation_point; - __u32 sctpi_inqueue; - __u32 sctpi_outqueue; - __u32 sctpi_overall_error; - __u32 sctpi_max_burst; - __u32 sctpi_maxseg; - __u32 sctpi_peer_rwnd; - __u32 sctpi_peer_tag; - __u8 sctpi_peer_capable; - __u8 sctpi_peer_sack; - __u16 __reserved1; - - /* assoc status info */ - __u64 sctpi_isacks; - __u64 sctpi_osacks; - __u64 sctpi_opackets; - __u64 sctpi_ipackets; - __u64 sctpi_rtxchunks; - __u64 sctpi_outofseqtsns; - __u64 sctpi_idupchunks; - __u64 sctpi_gapcnt; - __u64 sctpi_ouodchunks; - __u64 sctpi_iuodchunks; - __u64 sctpi_oodchunks; - __u64 sctpi_iodchunks; - __u64 sctpi_octrlchunks; - __u64 sctpi_ictrlchunks; - - /* primary transport info */ - struct sockaddr_storage sctpi_p_address; - __s32 sctpi_p_state; - __u32 sctpi_p_cwnd; - __u32 sctpi_p_srtt; - __u32 sctpi_p_rto; - __u32 sctpi_p_hbinterval; - __u32 sctpi_p_pathmaxrxt; - __u32 sctpi_p_sackdelay; - __u32 sctpi_p_sackfreq; - __u32 sctpi_p_ssthresh; - __u32 sctpi_p_partial_bytes_acked; - __u32 sctpi_p_flight_size; - __u16 sctpi_p_error; - __u16 __reserved2; - - /* sctp sock info */ - __u32 sctpi_s_autoclose; - __u32 sctpi_s_adaptation_ind; - __u32 sctpi_s_pd_point; - __u8 sctpi_s_nodelay; - __u8 sctpi_s_disable_fragments; - __u8 sctpi_s_v4mapped; - __u8 sctpi_s_frag_interleave; - __u32 sctpi_s_type; - __u32 __reserved3; -}; - struct sctp_infox { struct sctp_info *sctpinfo; struct sctp_association *asoc; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d304f4c..a406adc 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -944,4 +944,68 @@ struct sctp_default_prinfo { __u16 pr_policy; }; +struct sctp_info { + __u32 sctpi_tag; + __u32 sctpi_state; + __u32 sctpi_rwnd; + __u16 sctpi_unackdata; + __u16 sctpi_penddata; + __u16 sctpi_instrms; + __u16 sctpi_outstrms; + __u32 sctpi_fragmentation_point; + __u32 sctpi_inqueue; + __u32 sctpi_outqueue; + __u32 sctpi_overall_error; + __u32 sctpi_max_burst; + __u32 sctpi_maxseg; + __u32 sctpi_peer_rwnd; + __u32 sctpi_peer_tag; + __u8 sctpi_peer_capable; + __u8 sctpi_peer_sack; + __u16 __reserved1; + + /* assoc status info */ + __u64 sctpi_isacks; + __u64 sctpi_osacks; + __u64 sctpi_opackets; + __u64 sctpi_ipackets; + __u64 sctpi_rtxchunks; + __u64 sctpi_outofseqtsns; + __u64 sctpi_idupchunks; + __u64 sctpi_gapcnt; + __u64 sctpi_ouodchunks; + __u64 sctpi_iuodchunks; + __u64 sctpi_oodchunks; + __u64 sctpi_iodchunks; + __u64 sctpi_octrlchunks; + __u64 sctpi_ictrlchunks; + + /* primary transport info */ + struct sockaddr_storage sctpi_p_address; + __s32 sctpi_p_state; + __u32 sctpi_p_cwnd; + __u32 sctpi_p_srtt; + __u32 sctpi_p_rto; + __u32 sctpi_p_hbinterval; + __u32 sctpi_p_pathmaxrxt; + __u32 sctpi_p_sackdelay; + __u32 sctpi_p_sackfreq; + __u32 sctpi_p_ssthresh; + __u32 sctpi_p_partial_bytes_acked; + __u32 sctpi_p_flight_size; + __u16 sctpi_p_error; + __u16 __reserved2; + + /* sctp sock info */ + __u32 sctpi_s_autoclose; + __u32 sctpi_s_adaptation_ind; + __u32 sctpi_s_pd_point; + __u8 sctpi_s_nodelay; + __u8 sctpi_s_disable_fragments; + __u8 sctpi_s_v4mapped; + __u8 sctpi_s_frag_interleave; + __u32 sctpi_s_type; + __u32 __reserved3; +}; + #endif /* _UAPI_SCTP_H */ -- cgit v0.10.2 From 12474e8e58d82e2b815cd35956c0c06fab104ee7 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 4 Aug 2016 12:11:56 +0200 Subject: sctp_diag: Fix T3_rtx timer export The asoc's timer value is not kept in asoc->timeouts array but in it's primary transport instead. Furthermore, we must export the timer only if it is pending, otherwise the value will underrun when stored in an unsigned variable and user space will only see a very large timeout value. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f69edcf..f728ef0 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -13,6 +13,7 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, { union sctp_addr laddr, paddr; struct dst_entry *dst; + struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer; laddr = list_entry(asoc->base.bind_addr.address_list.next, struct sctp_sockaddr_entry, list)->a; @@ -40,10 +41,15 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, } r->idiag_state = asoc->state; - r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; - r->idiag_retrans = asoc->rtx_data_chunks; - r->idiag_expires = jiffies_to_msecs( - asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies); + if (timer_pending(t3_rtx)) { + r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; + r->idiag_retrans = asoc->rtx_data_chunks; + r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies); + } else { + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->idiag_expires = 0; + } } static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, -- cgit v0.10.2 From 1ba8d77f410d3b7423df39e8a10a02af272d3c42 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 4 Aug 2016 12:11:57 +0200 Subject: sctp_diag: Respect ss adding TCPF_CLOSE to idiag_states Since 'ss' always adds TCPF_CLOSE to idiag_states flags, sctp_diag can't rely upon TCPF_LISTEN flag solely being present when listening sockets are requested. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f728ef0..bb69153 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -356,7 +356,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) if (cb->args[4] < cb->args[1]) goto next; - if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs)) + if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs)) goto next; if (r->sdiag_family != AF_UNSPEC && @@ -471,7 +471,7 @@ skip: * 3 : to mark if we have dumped the ep info of the current asoc * 4 : to work as a temporary variable to traversal list */ - if (!(idiag_states & ~TCPF_LISTEN)) + if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); done: -- cgit v0.10.2 From 3b3bf80b994f0c6c35a25ef8965ab956b4bcced5 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 4 Aug 2016 12:37:17 +0200 Subject: rhashtable-test: Fix max_size parameter description Looks like a simple copy'n'paste error. Fixes: 1aa661f5c3df1 ("rhashtable-test: Measure time to insert, remove & traverse entries") Signed-off-by: Phil Sutter Signed-off-by: David S. Miller diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 297fdb5..64e899b 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)"); static int max_size = 0; module_param(max_size, int, 0); -MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)"); +MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)"); static bool shrinking = false; module_param(shrinking, bool, 0); -- cgit v0.10.2 From b489a2000f19e414710d2887fe3e24e903242766 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 4 Aug 2016 17:36:20 +0300 Subject: mlxsw: spectrum: Do not assume PAUSE frames are disabled When ieee_setpfc() gets called, PAUSE frames are not necessarily disabled on the port. Check if PAUSE frames are disabled or enabled and configure the port's headroom buffer accordingly. Fixes: d81a6bdb87ce ("mlxsw: spectrum: Add IEEE 802.1Qbb PFC support") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 01cfb75..3c4a178 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -351,17 +351,17 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); int err; - if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) && - pfc->pfc_en) { + if (pause_en && pfc->pfc_en) { netdev_err(dev, "PAUSE frames already enabled on port\n"); return -EINVAL; } err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, mlxsw_sp_port->dcb.ets->prio_tc, - false, pfc); + pause_en, pfc); if (err) { netdev_err(dev, "Failed to configure port's headroom for PFC\n"); return err; @@ -380,7 +380,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, err_port_pfc_set: __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, - mlxsw_sp_port->dcb.ets->prio_tc, false, + mlxsw_sp_port->dcb.ets->prio_tc, pause_en, mlxsw_sp_port->dcb.pfc); return err; } -- cgit v0.10.2 From 07d50cae0661e5479d54d6e3e21cad15b1198103 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 4 Aug 2016 17:36:21 +0300 Subject: mlxsw: spectrum: Do not override PAUSE settings The PFCC register is used to configure both PAUSE and PFC frames. Therefore, when PFC frames are disabled we must make sure we don't mistakenly also disable PAUSE frames (which might be enabled). Fix this by packing the PFCC register with the current PAUSE settings. Note that this register is also accessed via ethtool ops, but there we are guaranteed to have PFC disabled. Fixes: d81a6bdb87ce ("mlxsw: spectrum: Add IEEE 802.1Qbb PFC support") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 3c4a178..b6ed7f7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -341,6 +341,8 @@ static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port, char pfcc_pl[MLXSW_REG_PFCC_LEN]; mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause); mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en); return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), -- cgit v0.10.2 From 4de34eb5743f720dc4798f0647f75c21d44aa1f8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 4 Aug 2016 17:36:22 +0300 Subject: mlxsw: spectrum: Add missing DCB rollback in error path We correctly execute mlxsw_sp_port_dcb_fini() when port is removed, but I missed its rollback in the error path of port creation, so add it. Fixes: f00817df2b42 ("mlxsw: spectrum: Introduce support for Data Center Bridging (DCB)") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c3e6150..e1b8f62 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2220,6 +2220,7 @@ err_port_vlan_init: err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); err_port_dcb_init: err_port_ets_init: err_port_buffers_init: -- cgit v0.10.2 From 66cf3504f4c528793b3d8986bab606f7cfb1c4bb Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 4 Aug 2016 16:07:58 -0400 Subject: net/ethernet: tundra: fix dump_eth_one warning in tsi108_eth The call site for this function appears as: #ifdef DEBUG data->msg_enable = DEBUG; dump_eth_one(dev); #endif ...leading to the following warning for !DEBUG builds: drivers/net/ethernet/tundra/tsi108_eth.c:169:13: warning: 'dump_eth_one' defined but not used [-Wunused-function] static void dump_eth_one(struct net_device *dev) ^ ...when using the arch/powerpc/configs/mpc7448_hpc2_defconfig Put the function definition under the same #ifdef as the call site to avoid the warning. Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 01a7714..8fd1312 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -166,6 +166,7 @@ static struct platform_driver tsi_eth_driver = { static void tsi108_timed_checker(unsigned long dev_ptr); +#ifdef DEBUG static void dump_eth_one(struct net_device *dev) { struct tsi108_prv_data *data = netdev_priv(dev); @@ -190,6 +191,7 @@ static void dump_eth_one(struct net_device *dev) TSI_READ(TSI108_EC_RXESTAT), TSI_READ(TSI108_EC_RXERR), data->rxpending); } +#endif /* Synchronization is needed between the thread and up/down events. * Note that the PHY is accessed through the same registers for both -- cgit v0.10.2 From a2bfe6bf09a5f38df2bd0b1734f5fdee76f9366f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 5 Aug 2016 00:11:11 +0200 Subject: bpf: also call skb_postpush_rcsum on xmit occasions Follow-up to commit f8ffad69c9f8 ("bpf: add skb_postpush_rcsum and fix dev_forward_skb occasions") to fix an issue for dev_queue_xmit() redirect locations which need CHECKSUM_COMPLETE fixups on ingress. For the same reasons as described in f8ffad69c9f8 already, we of course also need this here, since dev_queue_xmit() on a veth device will let us end up in the dev_forward_skb() helper again to cross namespaces. Latter then calls into skb_postpull_rcsum() to pull out L2 header, so that netif_rx_internal() sees CHECKSUM_COMPLETE as it is expected. That is, CHECKSUM_COMPLETE on ingress covering L2 _payload_, not L2 headers. Also here we have to address bpf_redirect() and bpf_clone_redirect(). Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/net/core/filter.c b/net/core/filter.c index 5708999..c46244f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1365,6 +1365,12 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, return err; } +static inline void bpf_push_mac_rcsum(struct sk_buff *skb) +{ + if (skb_at_tc_ingress(skb)) + skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); +} + static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); @@ -1607,9 +1613,6 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_at_tc_ingress(skb)) - skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); - return dev_forward_skb(dev, skb); } @@ -1648,6 +1651,8 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb)) return -ENOMEM; + bpf_push_mac_rcsum(skb); + return flags & BPF_F_INGRESS ? __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); } @@ -1693,6 +1698,8 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } + bpf_push_mac_rcsum(skb); + return ri->flags & BPF_F_INGRESS ? __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); } -- cgit v0.10.2 From 479ffcccefd7b04442b0e949f8779b0612e8c75f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 5 Aug 2016 00:11:12 +0200 Subject: bpf: fix checksum fixups on bpf_skb_store_bytes bpf_skb_store_bytes() invocations above L2 header need BPF_F_RECOMPUTE_CSUM flag for updates, so that CHECKSUM_COMPLETE will be fixed up along the way. Where we ran into an issue with bpf_skb_store_bytes() is when we did a single-byte update on the IPv6 hoplimit despite using BPF_F_RECOMPUTE_CSUM flag; simple ping via ICMPv6 triggered a hw csum failure as a result. The underlying issue has been tracked down to a buffer alignment issue. Meaning, that csum_partial() computations via skb_postpull_rcsum() and skb_postpush_rcsum() pair invoked had a wrong result since they operated on an odd address for the hoplimit, while other computations were done on an even address. This mix doesn't work as-is with skb_postpull_rcsum(), skb_postpush_rcsum() pair as it always expects at least half-word alignment of input buffers, which is normally the case. Thus, instead of these helpers using csum_sub() and (implicitly) csum_add(), we need to use csum_block_sub(), csum_block_add(), respectively. For unaligned offsets, they rotate the sum to align it to a half-word boundary again, otherwise they work the same as csum_sub() and csum_add(). Adding __skb_postpull_rcsum(), __skb_postpush_rcsum() variants that take the offset as an input and adapting bpf_skb_store_bytes() to them fixes the hw csum failures again. The skb_postpull_rcsum(), skb_postpush_rcsum() helpers use a 0 constant for offset so that the compiler optimizes the offset & 1 test away and generates the same code as with csum_sub()/_add(). Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f0b3e0..0f665cb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2847,6 +2847,18 @@ static inline int skb_linearize_cow(struct sk_buff *skb) __skb_linearize(skb) : 0; } +static __always_inline void +__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, + unsigned int off) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_block_sub(skb->csum, + csum_partial(start, len, 0), off); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; +} + /** * skb_postpull_rcsum - update checksum for received skb after pull * @skb: buffer to update @@ -2857,36 +2869,38 @@ static inline int skb_linearize_cow(struct sk_buff *skb) * update the CHECKSUM_COMPLETE checksum, or set ip_summed to * CHECKSUM_NONE so that it can be recomputed from scratch. */ - static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); - else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) < 0) - skb->ip_summed = CHECKSUM_NONE; + __skb_postpull_rcsum(skb, start, len, 0); } -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static __always_inline void +__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len, + unsigned int off) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_block_add(skb->csum, + csum_partial(start, len, 0), off); +} +/** + * skb_postpush_rcsum - update checksum for received skb after push + * @skb: buffer to update + * @start: start of data after push + * @len: length of data pushed + * + * After doing a push on a received packet, you need to call this to + * update the CHECKSUM_COMPLETE checksum. + */ static inline void skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - /* For performing the reverse operation to skb_postpull_rcsum(), - * we can instead of ... - * - * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); - * - * ... just use this equivalent version here to save a few - * instructions. Feeding csum of 0 in csum_partial() and later - * on adding skb->csum is equivalent to feed skb->csum in the - * first place. - */ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_partial(start, len, skb->csum); + __skb_postpush_rcsum(skb, start, len, 0); } +unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); + /** * skb_push_rcsum - push skb and update receive checksum * @skb: buffer to update diff --git a/net/core/filter.c b/net/core/filter.c index c46244f..5ecd5c9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1401,7 +1401,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) return -EFAULT; if (flags & BPF_F_RECOMPUTE_CSUM) - skb_postpull_rcsum(skb, ptr, len); + __skb_postpull_rcsum(skb, ptr, len, offset); memcpy(ptr, from, len); @@ -1410,7 +1410,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) skb_store_bits(skb, offset, ptr, len); if (flags & BPF_F_RECOMPUTE_CSUM) - skb_postpush_rcsum(skb, ptr, len); + __skb_postpush_rcsum(skb, ptr, len, offset); if (flags & BPF_F_INVALIDATE_HASH) skb_clear_hash(skb); -- cgit v0.10.2 From 8065694e6519d234ccee93d952127e3f05b81ba5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 5 Aug 2016 00:11:13 +0200 Subject: bpf: fix checksum for vlan push/pop helper When having skbs on ingress with CHECKSUM_COMPLETE, tc BPF programs don't push rcsum of mac header back in and after BPF run back pull out again as opposed to some other subsystems (ovs, for example). For cases like q-in-q, meaning when a vlan tag for offloading is already present and we're about to push another one, then skb_vlan_push() pushes the inner one into the skb, increasing mac header and skb_postpush_rcsum()'ing the 4 bytes vlan header diff. Likewise, for the reverse operation in skb_vlan_pop() for the case where vlan header needs to be pulled out of the skb, we're decreasing the mac header and skb_postpull_rcsum()'ing the 4 bytes rcsum of the vlan header that was removed. However mangling the rcsum here will lead to hw csum failure for BPF case, since we're pulling or pushing data that was not part of the current rcsum. Changing tc BPF programs in general to push/pull rcsum around BPF_PROG_RUN() is also not really an option since current behaviour is ABI by now, but apart from that would also mean to do quite a bit of useless work in the sense that usually 12 bytes need to be rcsum pushed/pulled also when we don't need to touch this vlan related corner case. One way to fix it would be to push the necessary rcsum fixup down into vlan helpers that are (mostly) slow-path anyway. Fixes: 4e10df9a60d9 ("bpf: introduce bpf_skb_vlan_push/pop() helpers") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/net/core/filter.c b/net/core/filter.c index 5ecd5c9..b5add4e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1371,6 +1371,12 @@ static inline void bpf_push_mac_rcsum(struct sk_buff *skb) skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); } +static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) +{ + if (skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); +} + static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); @@ -1763,7 +1769,10 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) vlan_proto != htons(ETH_P_8021AD))) vlan_proto = htons(ETH_P_8021Q); + bpf_push_mac_rcsum(skb); ret = skb_vlan_push(skb, vlan_proto, vlan_tci); + bpf_pull_mac_rcsum(skb); + bpf_compute_data_end(skb); return ret; } @@ -1783,7 +1792,10 @@ static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; int ret; + bpf_push_mac_rcsum(skb); ret = skb_vlan_pop(skb); + bpf_pull_mac_rcsum(skb); + bpf_compute_data_end(skb); return ret; } -- cgit v0.10.2 From 272d96a5ab10662691b4ec90c4a66fdbf30ea7ba Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Fri, 5 Aug 2016 17:45:36 -0700 Subject: net: vxlan: lwt: Use source ip address during route lookup. LWT user can specify destination as well as source ip address for given tunnel endpoint. But vxlan is ignoring given source ip address. Following patch uses both ip address to route the tunnel packet. This consistent with other LWT implementations, like GENEVE and GRE. Fixes: ee122c79d42 ("vxlan: Flow based tunneling"). Signed-off-by: Pravin B Shelar Acked-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index da4e3d6..b812234 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1811,7 +1811,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, fl4.flowi4_mark = skb->mark; fl4.flowi4_proto = IPPROTO_UDP; fl4.daddr = daddr; - fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; + fl4.saddr = *saddr; rt = ip_route_output_key(vxlan->net, &fl4); if (!IS_ERR(rt)) { @@ -1847,7 +1847,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.daddr = *daddr; - fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr; + fl6.saddr = *saddr; fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; @@ -1920,7 +1920,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct rtable *rt = NULL; const struct iphdr *old_iph; union vxlan_addr *dst; - union vxlan_addr remote_ip; + union vxlan_addr remote_ip, local_ip; + union vxlan_addr *src; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; @@ -1938,6 +1939,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; dst = &rdst->remote_ip; + src = &vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; } else { if (!info) { @@ -1948,11 +1950,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; vni = vxlan_tun_id_to_vni(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); - if (remote_ip.sa.sa_family == AF_INET) + if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; - else + local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src; + } else { remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; + local_ip.sin6.sin6_addr = info->key.u.ipv6.src; + } dst = &remote_ip; + src = &local_ip; dst_cache = &info->dst_cache; } @@ -1992,15 +1998,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { - __be32 saddr; - if (!vxlan->vn4_sock) goto drop; sk = vxlan->vn4_sock->sock->sk; rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, - dst->sin.sin_addr.s_addr, &saddr, + dst->sin.sin_addr.s_addr, + &src->sin.sin_addr.s_addr, dst_cache, info); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", @@ -2043,13 +2048,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (err < 0) goto xmit_tx_error; - udp_tunnel_xmit_skb(rt, sk, skb, saddr, + udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; - struct in6_addr saddr; u32 rt6i_flags; if (!vxlan->vn6_sock) @@ -2058,7 +2062,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, - label, &dst->sin6.sin6_addr, &saddr, + label, &dst->sin6.sin6_addr, + &src->sin6.sin6_addr, dst_cache, info); if (IS_ERR(ndst)) { netdev_dbg(dev, "no route to %pI6\n", @@ -2104,7 +2109,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, - &saddr, &dst->sin6.sin6_addr, tos, ttl, + &src->sin6.sin6_addr, + &dst->sin6.sin6_addr, tos, ttl, label, src_port, dst_port, !udp_sum); #endif } -- cgit v0.10.2 From bbec7802c6948c8626b71a4fe31283cb4691c358 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Fri, 5 Aug 2016 17:45:37 -0700 Subject: net: vxlan: lwt: Fix vxlan local traffic. vxlan driver has bypass for local vxlan traffic, but that depends on information about all VNIs on local system in vxlan driver. This is not available in case of LWT. Therefore following patch disable encap bypass for LWT vxlan traffic. Fixes: ee122c79d42 ("vxlan: Flow based tunneling"). Reported-by: Jakub Libosvar Signed-off-by: Pravin B Shelar Acked-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index b812234..c0dda6f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2022,7 +2022,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } /* Bypass encapsulation if the destination is local */ - if (rt->rt_flags & RTCF_LOCAL && + if (!info && rt->rt_flags & RTCF_LOCAL && !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { struct vxlan_dev *dst_vxlan; @@ -2082,7 +2082,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, /* Bypass encapsulation if the destination is local */ rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; - if (rt6i_flags & RTF_LOCAL && + if (!info && rt6i_flags & RTF_LOCAL && !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { struct vxlan_dev *dst_vxlan; -- cgit v0.10.2 From 1fe323aa1b2390a0c57fb0b06a782f128d49094c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 7 Aug 2016 14:15:13 +0800 Subject: sctp: use event->chunk when it's valid Commit 52253db924d1 ("sctp: also point GSO head_skb to the sk when it's available") used event->chunk->head_skb to get the head_skb in sctp_ulpevent_set_owner(). But at that moment, the event->chunk was NULL, as it cloned the skb in sctp_ulpevent_make_rcvmsg(). Therefore, that patch didn't really work. This patch is to move the event->chunk initialization before calling sctp_ulpevent_receive_data() so that it uses event->chunk when it's valid. Fixes: 52253db924d1 ("sctp: also point GSO head_skb to the sk when it's available") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 1bc4f71..d85b803 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -702,14 +702,14 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, */ sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff)); - sctp_ulpevent_receive_data(event, asoc); - /* And hold the chunk as we need it for getting the IP headers * later in recvmsg */ sctp_chunk_hold(chunk); event->chunk = chunk; + sctp_ulpevent_receive_data(event, asoc); + event->stream = ntohs(chunk->subh.data_hdr->stream); event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; -- cgit v0.10.2 From fa00c437eef8dc2e7b25f8cd868cfa405fcc2bb3 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Fri, 5 Aug 2016 13:44:10 -0600 Subject: aacraid: Check size values after double-fetch from user In aacraid's ioctl_send_fib() we do two fetches from userspace, one the get the fib header's size and one for the fib itself. Later we use the size field from the second fetch to further process the fib. If for some reason the size from the second fetch is different than from the first fix, we may encounter an out-of- bounds access in aac_fib_send(). We also check the sender size to insure it is not out of bounds. This was reported in https://bugzilla.kernel.org/show_bug.cgi?id=116751 and was assigned CVE-2016-6480. Reported-by: Pengfei Wang Fixes: 7c00ffa31 '[SCSI] 2.6 aacraid: Variable FIB size (updated patch)' Cc: stable@vger.kernel.org Signed-off-by: Dave Carroll Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index b381b37..5648b71 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -63,7 +63,7 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg) struct fib *fibptr; struct hw_fib * hw_fib = (struct hw_fib *)0; dma_addr_t hw_fib_pa = (dma_addr_t)0LL; - unsigned size; + unsigned int size, osize; int retval; if (dev->in_reset) { @@ -87,7 +87,8 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg) * will not overrun the buffer when we copy the memory. Return * an error if we would. */ - size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr); + osize = size = le16_to_cpu(kfib->header.Size) + + sizeof(struct aac_fibhdr); if (size < le16_to_cpu(kfib->header.SenderSize)) size = le16_to_cpu(kfib->header.SenderSize); if (size > dev->max_fib_size) { @@ -118,6 +119,14 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg) goto cleanup; } + /* Sanity check the second copy */ + if ((osize != le16_to_cpu(kfib->header.Size) + + sizeof(struct aac_fibhdr)) + || (size < le16_to_cpu(kfib->header.SenderSize))) { + retval = -EINVAL; + goto cleanup; + } + if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) { aac_adapter_interrupt(dev); /* -- cgit v0.10.2 From c39b487f195b93235ee76384427467786f7bf29f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Aug 2016 00:20:28 -0400 Subject: Revert "drm/amdgpu: work around lack of upstream ACPI support for D3cold" This reverts commit c63695cc5e5f685e924e25a8f9555f6e846f1fc6. Now that d3cold support is upstream, there is no more need for this workaround. bug: https://bugs.freedesktop.org/show_bug.cgi?id=97248 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 49de926..10b5ddf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -200,16 +200,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { printk("ATPX Hybrid Graphics\n"); -#if 1 - /* This is a temporary hack until the D3 cold support - * makes it upstream. The ATPX power_control method seems - * to still work on even if the system should be using - * the new standardized hybrid D3 cold ACPI interface. - */ - atpx->functions.power_cntl = true; -#else atpx->functions.power_cntl = false; -#endif atpx->is_hybrid = true; } -- cgit v0.10.2 From b817634276f7f68c9d1d6d4a27117ff3c2f16956 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Aug 2016 00:21:45 -0400 Subject: Revert "drm/radeon: work around lack of upstream ACPI support for D3cold" This reverts commit bdfb76040068d960cb9e226876be8a508d741c4a. Now that d3cold is upstream, there is no more need for this workaround. diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 6de3428..ddef0d4 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -198,16 +198,7 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { printk("ATPX Hybrid Graphics\n"); -#if 1 - /* This is a temporary hack until the D3 cold support - * makes it upstream. The ATPX power_control method seems - * to still work on even if the system should be using - * the new standardized hybrid D3 cold ACPI interface. - */ - atpx->functions.power_cntl = true; -#else atpx->functions.power_cntl = false; -#endif atpx->is_hybrid = true; } -- cgit v0.10.2 From c0c45a6bd7d054efd80c1033bf4285830c72835b Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 8 Aug 2016 21:57:40 -0400 Subject: qed: Remove the endian-ness conversion for pri_to_tc value. Endian-ness conversion is not needed for priority-to-TC field as the field is already being read/written by the driver in big-endian way. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index d0dc28f..6869330 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -483,7 +483,7 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn, bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]); tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]); tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]); - pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]); + pri_map = p_ets->pri_tc_tbl[0]; for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) { p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i]; p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i]; @@ -944,7 +944,6 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn, val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4)); p_ets->pri_tc_tbl[0] |= val; } - p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]); for (i = 0; i < 2; i++) { p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]); p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]); -- cgit v0.10.2 From fb9ea8a9b70c79f38d2758c25d3acff4a2cd5bfb Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 8 Aug 2016 21:57:41 -0400 Subject: qed: Use ieee mfw-mask to get ethtype in ieee-dcbx mode. Ethtype value is being read incorrectly in ieee-dcbx mode. Use the correct mfw mask value. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 6869330..f07f0ac 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -52,16 +52,33 @@ static bool qed_dcbx_app_ethtype(u32 app_info_bitmap) DCBX_APP_SF_ETHTYPE); } +static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap) +{ + u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE); + + /* Old MFW */ + if (mfw_val == DCBX_APP_SF_IEEE_RESERVED) + return qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE); +} + static bool qed_dcbx_app_port(u32 app_info_bitmap) { return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) == DCBX_APP_SF_PORT); } -static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_DEFAULT); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT)); } static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id) @@ -70,16 +87,28 @@ static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id) proto_id == QED_TCP_PORT_ISCSI); } -static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_FCOE); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE)); } -static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_ROCE); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE)); } static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id) @@ -164,15 +193,15 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data, static bool qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn, u32 app_prio_bitmap, - u16 id, enum dcbx_protocol_type *type) + u16 id, enum dcbx_protocol_type *type, bool ieee) { - if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) { + if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_FCOE; - } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE; } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) { *type = DCBX_PROTOCOL_ISCSI; - } else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ETH; } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) { *type = DCBX_PROTOCOL_ROCE_V2; @@ -194,17 +223,18 @@ static int qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data, struct dcbx_app_priority_entry *p_tbl, - u32 pri_tc_tbl, int count, bool dcbx_enabled) + u32 pri_tc_tbl, int count, u8 dcbx_version) { u8 tc, priority_map; enum dcbx_protocol_type type; + bool enable, ieee; u16 protocol_id; int priority; - bool enable; int i; DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count); + ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE); /* Parse APP TLV */ for (i = 0; i < count; i++) { protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry, @@ -219,7 +249,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority); if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry, - protocol_id, &type)) { + protocol_id, &type, ieee)) { /* ETH always have the enable bit reset, as it gets * vlan information per packet. For other protocols, * should be set according to the dcbx_enabled @@ -275,15 +305,12 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn) struct dcbx_ets_feature *p_ets; struct qed_hw_info *p_info; u32 pri_tc_tbl, flags; - bool dcbx_enabled; + u8 dcbx_version; int num_entries; int rc = 0; - /* If DCBx version is non zero, then negotiation was - * successfuly performed - */ flags = p_hwfn->p_dcbx_info->operational.flags; - dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION); + dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION); p_app = &p_hwfn->p_dcbx_info->operational.features.app; p_tbl = p_app->app_pri_tbl; @@ -295,13 +322,13 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn) num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES); rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl, - num_entries, dcbx_enabled); + num_entries, dcbx_version); if (rc) return rc; p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS); data.pf_id = p_hwfn->rel_pf_id; - data.dcbx_enabled = dcbx_enabled; + data.dcbx_enabled = !!dcbx_version; qed_dcbx_dp_protocol(p_hwfn, &data); @@ -400,7 +427,7 @@ static void qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, struct dcbx_app_priority_entry *p_tbl, - struct qed_dcbx_params *p_params) + struct qed_dcbx_params *p_params, bool ieee) { struct qed_app_entry *entry; u8 pri_map; @@ -422,7 +449,7 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, DCBX_APP_PROTOCOL_ID); qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry, entry->proto_id, - &entry->proto_type); + &entry->proto_type, ieee); } DP_VERBOSE(p_hwfn, QED_MSG_DCB, @@ -500,9 +527,9 @@ qed_dcbx_get_common_params(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, struct dcbx_app_priority_entry *p_tbl, struct dcbx_ets_feature *p_ets, - u32 pfc, struct qed_dcbx_params *p_params) + u32 pfc, struct qed_dcbx_params *p_params, bool ieee) { - qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params); + qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee); qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params); qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params); } @@ -516,7 +543,7 @@ qed_dcbx_get_local_params(struct qed_hwfn *p_hwfn, p_feat = &p_hwfn->p_dcbx_info->local_admin.features; qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->local.params); + p_feat->pfc, ¶ms->local.params, false); params->local.valid = true; } @@ -529,7 +556,7 @@ qed_dcbx_get_remote_params(struct qed_hwfn *p_hwfn, p_feat = &p_hwfn->p_dcbx_info->remote.features; qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->remote.params); + p_feat->pfc, ¶ms->remote.params, false); params->remote.valid = true; } @@ -574,7 +601,8 @@ qed_dcbx_get_operational_params(struct qed_hwfn *p_hwfn, qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->operational.params); + p_feat->pfc, ¶ms->operational.params, + p_operational->ieee); qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results); err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR); p_operational->err = err; diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 5927840..6f9d3b8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -6850,6 +6850,14 @@ struct dcbx_app_priority_entry { #define DCBX_APP_SF_SHIFT 8 #define DCBX_APP_SF_ETHTYPE 0 #define DCBX_APP_SF_PORT 1 +#define DCBX_APP_SF_IEEE_MASK 0x0000f000 +#define DCBX_APP_SF_IEEE_SHIFT 12 +#define DCBX_APP_SF_IEEE_RESERVED 0 +#define DCBX_APP_SF_IEEE_ETHTYPE 1 +#define DCBX_APP_SF_IEEE_TCP_PORT 2 +#define DCBX_APP_SF_IEEE_UDP_PORT 3 +#define DCBX_APP_SF_IEEE_TCP_UDP_PORT 4 + #define DCBX_APP_PROTOCOL_ID_MASK 0xffff0000 #define DCBX_APP_PROTOCOL_ID_SHIFT 16 }; -- cgit v0.10.2 From 59bcb7972fc5d53a621ee6b2c3cf1654cebb3dc5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 8 Aug 2016 21:57:42 -0400 Subject: qed: Add dcbx app support for IEEE Selection Field. MFW now supports the Selection field for IEEE mode. Add driver changes to use the newer MFW masks to read/write the port-id value. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index f07f0ac..b157a6a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -69,6 +69,17 @@ static bool qed_dcbx_app_port(u32 app_info_bitmap) DCBX_APP_SF_PORT); } +static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type) +{ + u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE); + + /* Old MFW */ + if (mfw_val == DCBX_APP_SF_IEEE_RESERVED) + return qed_dcbx_app_port(app_info_bitmap); + + return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT); +} + static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { bool ethtype; @@ -81,10 +92,17 @@ static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT)); } -static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_port(app_info_bitmap) && - proto_id == QED_TCP_PORT_ISCSI); + bool port; + + if (ieee) + port = qed_dcbx_ieee_app_port(app_info_bitmap, + DCBX_APP_SF_IEEE_TCP_PORT); + else + port = qed_dcbx_app_port(app_info_bitmap); + + return !!(port && (proto_id == QED_TCP_PORT_ISCSI)); } static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) @@ -111,10 +129,17 @@ static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE)); } -static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_port(app_info_bitmap) && - proto_id == QED_UDP_PORT_TYPE_ROCE_V2); + bool port; + + if (ieee) + port = qed_dcbx_ieee_app_port(app_info_bitmap, + DCBX_APP_SF_IEEE_UDP_PORT); + else + port = qed_dcbx_app_port(app_info_bitmap); + + return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2)); } static void @@ -199,11 +224,11 @@ qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn, *type = DCBX_PROTOCOL_FCOE; } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE; - } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ISCSI; } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ETH; - } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE_V2; } else { *type = DCBX_MAX_PROTOCOL_TYPE; @@ -441,8 +466,39 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, DCBX_APP_NUM_ENTRIES); for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_params->app_entry[i]; - entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry, - DCBX_APP_SF)); + if (ieee) { + u8 sf_ieee; + u32 val; + + sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF_IEEE); + switch (sf_ieee) { + case DCBX_APP_SF_IEEE_RESERVED: + /* Old MFW */ + val = QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF); + entry->sf_ieee = val ? + QED_DCBX_SF_IEEE_TCP_UDP_PORT : + QED_DCBX_SF_IEEE_ETHTYPE; + break; + case DCBX_APP_SF_IEEE_ETHTYPE: + entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE; + break; + case DCBX_APP_SF_IEEE_TCP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT; + break; + case DCBX_APP_SF_IEEE_UDP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT; + break; + case DCBX_APP_SF_IEEE_TCP_UDP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT; + break; + } + } else { + entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF)); + } + pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP); entry->prio = ffs(pri_map) - 1; entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry, @@ -981,7 +1037,7 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn, static void qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, - struct qed_dcbx_params *p_params) + struct qed_dcbx_params *p_params, bool ieee) { u32 *entry; int i; @@ -1002,12 +1058,36 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_app->app_pri_tbl[i].entry; - *entry &= ~DCBX_APP_SF_MASK; - if (p_params->app_entry[i].ethtype) - *entry |= ((u32)DCBX_APP_SF_ETHTYPE << - DCBX_APP_SF_SHIFT); - else - *entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT); + if (ieee) { + *entry &= ~DCBX_APP_SF_IEEE_MASK; + switch (p_params->app_entry[i].sf_ieee) { + case QED_DCBX_SF_IEEE_ETHTYPE: + *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_TCP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_UDP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_TCP_UDP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + } + } else { + *entry &= ~DCBX_APP_SF_MASK; + if (p_params->app_entry[i].ethtype) + *entry |= ((u32)DCBX_APP_SF_ETHTYPE << + DCBX_APP_SF_SHIFT); + else + *entry |= ((u32)DCBX_APP_SF_PORT << + DCBX_APP_SF_SHIFT); + } + *entry &= ~DCBX_APP_PROTOCOL_ID_MASK; *entry |= ((u32)p_params->app_entry[i].proto_id << DCBX_APP_PROTOCOL_ID_SHIFT); @@ -1022,15 +1102,19 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn, struct dcbx_local_params *local_admin, struct qed_dcbx_set *params) { + bool ieee = false; + local_admin->flags = 0; memcpy(&local_admin->features, &p_hwfn->p_dcbx_info->operational.features, sizeof(local_admin->features)); - if (params->enabled) + if (params->enabled) { local_admin->config = params->ver_num; - else + ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE); + } else { local_admin->config = DCBX_CONFIG_VERSION_DISABLED; + } if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG) qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc, @@ -1042,7 +1126,7 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn, if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG) qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app, - ¶ms->config.params); + ¶ms->config.params, ieee); } int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b1e3c57..d6c4177 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -70,8 +70,16 @@ struct qed_dbcx_pfc_params { u8 max_tc; }; +enum qed_dcbx_sf_ieee_type { + QED_DCBX_SF_IEEE_ETHTYPE, + QED_DCBX_SF_IEEE_TCP_PORT, + QED_DCBX_SF_IEEE_UDP_PORT, + QED_DCBX_SF_IEEE_TCP_UDP_PORT +}; + struct qed_app_entry { bool ethtype; + enum qed_dcbx_sf_ieee_type sf_ieee; bool enabled; u8 prio; u16 proto_id; -- cgit v0.10.2 From 1d7406ce7bdfc48cd7390f793d23ef81fff75880 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 8 Aug 2016 21:57:43 -0400 Subject: qed: Update app count when adding a new dcbx app entry to the table. App count is not updated while adding new app entry to the dcbx app table. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index b157a6a..226cb08 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1707,8 +1707,10 @@ static int qed_dcbnl_setapp(struct qed_dev *cdev, if ((entry->ethtype == ethtype) && (entry->proto_id == idval)) break; /* First empty slot */ - if (!entry->proto_id) + if (!entry->proto_id) { + dcbx_set.config.params.num_app_entries++; break; + } } if (i == QED_DCBX_MAX_APP_PROTOCOL) { @@ -2228,8 +2230,10 @@ int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) (entry->proto_id == app->protocol)) break; /* First empty slot */ - if (!entry->proto_id) + if (!entry->proto_id) { + dcbx_set.config.params.num_app_entries++; break; + } } if (i == QED_DCBX_MAX_APP_PROTOCOL) { -- cgit v0.10.2 From 6dae61627d1004895bfcee81d24482ec64cbecc5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 13 Jul 2016 08:36:03 +0100 Subject: drm/amdkfd: print doorbell offset as a hex value The doorbell offset is formatted with a 0x prefix to suggest it is a hexadecimal value, when in fact %d is being used and this is confusing. Use %X instead to match the proceeding 0x prefix. Signed-off-by: Colin Ian King Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index e621eba..a7d3cb3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -184,7 +184,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, sizeof(u32)) + inx; pr_debug("kfd: get kernel queue doorbell\n" - " doorbell offset == 0x%08d\n" + " doorbell offset == 0x%08X\n" " kernel address == 0x%08lX\n", *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); -- cgit v0.10.2 From b173a28f62cf929324a8a6adcc45adadce311d16 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 8 Aug 2016 21:57:58 +0800 Subject: netfilter: nf_ct_expect: remove the redundant slash when policy name is empty The 'name' filed in struct nf_conntrack_expect_policy{} is not a pointer, so check it is NULL or not will always return true. Even if the name is empty, slash will always be displayed like follows: # cat /proc/net/nf_conntrack_expect 297 l3proto = 2 proto=6 src=1.1.1.1 dst=2.2.2.2 sport=1 dport=1025 ftp/ ^ Fixes: 3a8fc53a45c4 ("netfilter: nf_ct_helper: allocate 16 bytes for the helper and policy names") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 9e36931..f8dbacf 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -574,7 +574,7 @@ static int exp_seq_show(struct seq_file *s, void *v) helper = rcu_dereference(nfct_help(expect->master)->helper); if (helper) { seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); - if (helper->expect_policy[expect->class].name) + if (helper->expect_policy[expect->class].name[0]) seq_printf(s, "/%s", helper->expect_policy[expect->class].name); } -- cgit v0.10.2 From b18bcb0019cf57a3db0917b77e65c29bd9b00f54 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 8 Aug 2016 22:03:40 +0800 Subject: netfilter: nfnetlink_queue: fix memory leak when attach expectation successfully User can use NFQA_EXP to attach expectations to conntracks, but we forget to put back nf_conntrack_expect when it is inserted successfully, i.e. in this normal case, expect's use refcnt will be 3. So even we unlink it and put it back later, the use refcnt is still 1, then the memory will be leaked forever. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 050bb34..b9bfe64 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2362,12 +2362,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, return PTR_ERR(exp); err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) { - nf_ct_expect_put(exp); - return err; - } - - return 0; + nf_ct_expect_put(exp); + return err; } static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, -- cgit v0.10.2 From 00a3101f561816e58de054a470484996f78eb5eb Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 8 Aug 2016 22:07:27 +0800 Subject: netfilter: nfnetlink_queue: reject verdict request from different portid Like NFQNL_MSG_VERDICT_BATCH do, we should also reject the verdict request when the portid is not same with the initial portid(maybe from another process). Fixes: 97d32cf9440d ("netfilter: nfnetlink_queue: batch verdict support") Signed-off-by: Liping Zhang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5d36a09..f49f450 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1145,10 +1145,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct nfnl_queue_net *q = nfnl_queue_pernet(net); int err; - queue = instance_lookup(q, queue_num); - if (!queue) - queue = verdict_instance_lookup(q, queue_num, - NETLINK_CB(skb).portid); + queue = verdict_instance_lookup(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); -- cgit v0.10.2 From aa0c2c68abd1e2915656cc81afdb195bc8595dec Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 8 Aug 2016 22:10:26 +0800 Subject: netfilter: ctnetlink: reject new conntrack request with different l4proto Currently, user can add a conntrack with different l4proto via nfnetlink. For example, original tuple is TCP while reply tuple is SCTP. This is invalid combination, we should report EINVAL to userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b9bfe64..fdfc71f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1894,6 +1894,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) return -EINVAL; + if (otuple.dst.protonum != rtuple.dst.protonum) + return -EINVAL; ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple, &rtuple, u3); -- cgit v0.10.2 From 55cae7a403f3b52de3dd6e4a614582541c9631af Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Aug 2016 12:13:45 +0200 Subject: rxrpc: fix uninitialized pointer dereference in debug code A newly added bugfix caused an uninitialized variable to be used for printing debug output. This is harmless as long as the debug setting is disabled, but otherwise leads to an immediate crash. gcc warns about this when -Wmaybe-uninitialized is enabled: net/rxrpc/call_object.c: In function 'rxrpc_release_call': net/rxrpc/call_object.c:496:163: error: 'sp' may be used uninitialized in this function [-Werror=maybe-uninitialized] The initialization was removed but one of the users remains. This adds back the initialization. Signed-off-by: Arnd Bergmann Fixes: 372ee16386bb ("rxrpc: Fix races between skb free, ACK generation and replying") Signed-off-by: David Howells diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c47f14f..e8c953c 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -493,6 +493,7 @@ void rxrpc_release_call(struct rxrpc_call *call) (skb = skb_dequeue(&call->rx_oos_queue))) { spin_unlock_bh(&call->lock); + sp = rxrpc_skb(skb); _debug("- zap %s %%%u #%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial, sp->hdr.seq); -- cgit v0.10.2 From c12abf346456416ca7f7ba45f363cf92d2480a99 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2016 08:46:15 +1000 Subject: crypto: powerpc - CRYPT_CRC32C_VPMSUM should depend on ALTIVEC The optimised crc32c implementation depends on VMX (aka. Altivec) instructions, so the kernel must be built with Altivec support in order for the crc32c code to build. Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c") Acked-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Herbert Xu diff --git a/crypto/Kconfig b/crypto/Kconfig index a9377be..84d7148 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -439,7 +439,7 @@ config CRYPTO_CRC32C_INTEL config CRYPT_CRC32C_VPMSUM tristate "CRC32c CRC algorithm (powerpc64)" - depends on PPC64 + depends on PPC64 && ALTIVEC select CRYPTO_HASH select CRC32 help -- cgit v0.10.2 From a0118c8b2be9297aed8e915c60b4013326b256d4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 9 Aug 2016 08:27:17 +0100 Subject: crypto: caam - fix non-hmac hashes Since 6de62f15b581 ("crypto: algif_hash - Require setkey before accept(2)"), the AF_ALG interface requires userspace to provide a key to any algorithm that has a setkey method. However, the non-HMAC algorithms are not keyed, so setting a key is unnecessary. Fix this by removing the setkey method from the non-keyed hash algorithms. Fixes: 6de62f15b581 ("crypto: algif_hash - Require setkey before accept(2)") Cc: Signed-off-by: Russell King Signed-off-by: Herbert Xu diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index f1ecc8d..36365b3 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1898,6 +1898,7 @@ caam_hash_alloc(struct caam_hash_template *template, template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); + t_alg->ahash_alg.setkey = NULL; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; -- cgit v0.10.2 From 6bb47e8ab98accb1319bd43c64966340ba3bba9a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Aug 2016 13:32:22 -0700 Subject: usb: hub: Fix unbalanced reference count/memory leak/deadlocks Memory leak and unbalanced reference count: If the hub gets disconnected while the core is still activating it, this can result in leaking memory of few USB structures. This will happen if we have done a kref_get() from hub_activate() and scheduled a delayed work item for HUB_INIT2/3. Now if hub_disconnect() gets called before the delayed work expires, then we will cancel the work from hub_quiesce(), but wouldn't do a kref_put(). And so the unbalance. kmemleak reports this as (with the commit e50293ef9775 backported to 3.10 kernel with other changes, though the same is true for mainline as well): unreferenced object 0xffffffc08af5b800 (size 1024): comm "khubd", pid 73, jiffies 4295051211 (age 6482.350s) hex dump (first 32 bytes): 30 68 f3 8c c0 ff ff ff 00 a0 b2 2e c0 ff ff ff 0h.............. 01 00 00 00 00 00 00 00 00 94 7d 40 c0 ff ff ff ..........}@.... backtrace: [] create_object+0x148/0x2a0 [] kmemleak_alloc+0x80/0xbc [] kmem_cache_alloc_trace+0x120/0x1ac [] hub_probe+0x120/0xb84 [] usb_probe_interface+0x1ec/0x298 [] driver_probe_device+0x160/0x374 [] __device_attach+0x28/0x4c [] bus_for_each_drv+0x78/0xac [] device_attach+0x6c/0x9c [] bus_probe_device+0x28/0xa0 [] device_add+0x324/0x604 [] usb_set_configuration+0x660/0x6cc [] generic_probe+0x44/0x84 [] usb_probe_device+0x54/0x74 [] driver_probe_device+0x160/0x374 [] __device_attach+0x28/0x4c Deadlocks: If the hub gets disconnected early enough (i.e. before INIT2/INIT3 are finished and the init_work is still queued), the core may call hub_quiesce() after acquiring interface device locks and it will wait for the work to be cancelled synchronously. But if the work handler is already running in parallel, it may try to acquire the same interface device lock and this may result in deadlock. Fix both the issues by removing the call to cancel_delayed_work_sync(). CC: #4.4+ Fixes: e50293ef9775 ("USB: fix invalid memory access in hub_activate()") Reported-by: Manu Gautam Signed-off-by: Viresh Kumar Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bee1351..3ccffac 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1315,8 +1315,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) struct usb_device *hdev = hub->hdev; int i; - cancel_delayed_work_sync(&hub->init_work); - /* hub_wq and related activity won't re-trigger */ hub->quiescing = 1; -- cgit v0.10.2 From ca5cbc8b02f9b21cc8cd1ab36668763ec34f9ee8 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 5 Aug 2016 11:49:45 -0400 Subject: USB: hub: fix up early-exit pathway in hub_activate The early-exit pathway in hub_activate, added by commit e50293ef9775 ("USB: fix invalid memory access in hub_activate()") needs improvement. It duplicates code that is already present at the end of the subroutine, and it neglects to undo the effect of a usb_autopm_get_interface_no_resume() call. This patch fixes both problems by making the early-exit pathway jump directly to the end of the subroutine. It simplifies the code at the end by merging two conditionals that actually test the same condition although they appear different: If type < HUB_INIT3 then type must be either HUB_INIT2 or HUB_INIT, and it can't be HUB_INIT because in that case the subroutine would have exited earlier. Signed-off-by: Alan Stern CC: #4.4+ Reviewed-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3ccffac..bb69d26 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1055,11 +1055,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_lock(hub->intfdev); /* Was the hub disconnected while we were waiting? */ - if (hub->disconnected) { - device_unlock(hub->intfdev); - kref_put(&hub->kref, hub_release); - return; - } + if (hub->disconnected) + goto disconnected; if (type == HUB_INIT2) goto init2; goto init3; @@ -1281,12 +1278,12 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Scan all ports that need attention */ kick_hub_wq(hub); - /* Allow autosuspend if it was suppressed */ - if (type <= HUB_INIT3) + if (type == HUB_INIT2 || type == HUB_INIT3) { + /* Allow autosuspend if it was suppressed */ + disconnected: usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); - - if (type == HUB_INIT2 || type == HUB_INIT3) device_unlock(hub->intfdev); + } kref_put(&hub->kref, hub_release); } -- cgit v0.10.2 From 07d316a22e119fa301fd7dba7f1e1adfd4f72c05 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 5 Aug 2016 11:51:30 -0400 Subject: USB: hub: change the locking in hub_activate The locking in hub_activate() is not adequate to provide full mutual exclusion with hub_quiesce(). The subroutine locks the hub's usb_interface, but the callers of hub_quiesce() (such as hub_pre_reset() and hub_event()) hold the lock to the hub's usb_device. This patch changes hub_activate() to make it acquire the same lock as those other routines. Signed-off-by: Alan Stern CC: #4.4+ Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bb69d26..1d5fc32 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1052,7 +1052,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Continue a partial initialization */ if (type == HUB_INIT2 || type == HUB_INIT3) { - device_lock(hub->intfdev); + device_lock(&hdev->dev); /* Was the hub disconnected while we were waiting? */ if (hub->disconnected) @@ -1259,7 +1259,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) queue_delayed_work(system_power_efficient_wq, &hub->init_work, msecs_to_jiffies(delay)); - device_unlock(hub->intfdev); + device_unlock(&hdev->dev); return; /* Continues at init3: below */ } else { msleep(delay); @@ -1282,7 +1282,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Allow autosuspend if it was suppressed */ disconnected: usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); - device_unlock(hub->intfdev); + device_unlock(&hdev->dev); } kref_put(&hub->kref, hub_release); -- cgit v0.10.2 From 28324936f3d672bbf83472fece8f36a158a52276 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 26 Jul 2016 16:01:30 +0800 Subject: usb: misc: usbtest: usbtest_do_ioctl may return positive integer For case 14 and case 21, their correct return value is the number of bytes transferred, so it is a positive integer. But in usbtest_ioctl, it takes non-zero as false return value for usbtest_do_ioctl, so it will treat the correct test as wrong test, then the time on tests will be the minus value. Signed-off-by: Peter Chen Cc: stable Fixes: 18fc4ebdc705 ("usb: misc: usbtest: Remove timeval usage") Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 6b978f0..5e3464e 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -2602,7 +2602,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf) ktime_get_ts64(&start); retval = usbtest_do_ioctl(intf, param_32); - if (retval) + if (retval < 0) goto free_mutex; ktime_get_ts64(&end); -- cgit v0.10.2 From 9c6256a5e707a9eb8b91962b550050b13aa75334 Mon Sep 17 00:00:00 2001 From: Xiao Han Date: Tue, 14 Jun 2016 16:22:54 +0200 Subject: usb: misc: ftdi-elan: Fix off-by-one memory corruptions This patch fixes fives off-by-one bugs in the ftdi-elan driver code. The bug can be triggered by plugging a USB adapter for CardBus 3G cards (model U132 manufactured by Elan Digital Systems, Ltd), causing a kernel panic. The fix was tested on Ubuntu 14.04.4 with 4.7.0-rc14.2.0-27-generic+ and 4.4.0-22-generic+ kernel. In the ftdi_elan_synchronize function, an off-by-one memory corruption occurs when packet_bytes is equal or bigger than m. After having read m bytes, that is bytes_read is equal to m, " ..\x00" is still copied to the stack variable causing an out bounds write of 4 bytes, which overwrites the stack canary and results in a kernel panic. This off-by-one requires physical access to the machine. It is not exploitable since we have no control on the overwritten data. Similar off-by-one bugs have been observed in 4 other functions: ftdi_elan_stuck_waiting, ftdi_elan_read, ftdi_elan_edset_output and ftdi_elan_flush_input_fifo. Reported-by: Alex Palesandro Signed-off-by: Xiao Han Tested-by: Paul Chaignon Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 52c27ca..9b5b3b2 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -665,7 +665,7 @@ static ssize_t ftdi_elan_read(struct file *file, char __user *buffer, { char data[30 *3 + 4]; char *d = data; - int m = (sizeof(data) - 1) / 3; + int m = (sizeof(data) - 1) / 3 - 1; int bytes_read = 0; int retry_on_empty = 10; int retry_on_timeout = 5; @@ -1684,7 +1684,7 @@ wait:if (ftdi->disconnected > 0) { int i = 0; char data[30 *3 + 4]; char *d = data; - int m = (sizeof(data) - 1) / 3; + int m = (sizeof(data) - 1) / 3 - 1; int l = 0; struct u132_target *target = &ftdi->target[ed]; struct u132_command *command = &ftdi->command[ @@ -1876,7 +1876,7 @@ more:{ if (packet_bytes > 2) { char diag[30 *3 + 4]; char *d = diag; - int m = (sizeof(diag) - 1) / 3; + int m = (sizeof(diag) - 1) / 3 - 1; char *b = ftdi->bulk_in_buffer; int bytes_read = 0; diag[0] = 0; @@ -2053,7 +2053,7 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi) if (packet_bytes > 2) { char diag[30 *3 + 4]; char *d = diag; - int m = (sizeof(diag) - 1) / 3; + int m = (sizeof(diag) - 1) / 3 - 1; char *b = ftdi->bulk_in_buffer; int bytes_read = 0; unsigned char c = 0; @@ -2155,7 +2155,7 @@ more:{ if (packet_bytes > 2) { char diag[30 *3 + 4]; char *d = diag; - int m = (sizeof(diag) - 1) / 3; + int m = (sizeof(diag) - 1) / 3 - 1; char *b = ftdi->bulk_in_buffer; int bytes_read = 0; diag[0] = 0; -- cgit v0.10.2 From aed9d65ac3278d4febd8665bd7db59ef53e825fe Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 1 Aug 2016 15:25:56 -0400 Subject: USB: validate wMaxPacketValue entries in endpoint descriptors Erroneous or malicious endpoint descriptors may have non-zero bits in reserved positions, or out-of-bounds values. This patch helps prevent these from causing problems by bounds-checking the wMaxPacketValue entries in endpoint descriptors and capping the values at the maximum allowed. This issue was first discovered and tests were conducted by Jake Lamberson , an intern working for Rosie Hall. Signed-off-by: Alan Stern Reported-by: roswest Tested-by: roswest CC: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 31ccdcc..0511631 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -171,6 +171,31 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, ep, buffer, size); } +static const unsigned short low_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 8, + [USB_ENDPOINT_XFER_ISOC] = 0, + [USB_ENDPOINT_XFER_BULK] = 0, + [USB_ENDPOINT_XFER_INT] = 8, +}; +static const unsigned short full_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 64, + [USB_ENDPOINT_XFER_ISOC] = 1023, + [USB_ENDPOINT_XFER_BULK] = 64, + [USB_ENDPOINT_XFER_INT] = 64, +}; +static const unsigned short high_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 64, + [USB_ENDPOINT_XFER_ISOC] = 1024, + [USB_ENDPOINT_XFER_BULK] = 512, + [USB_ENDPOINT_XFER_INT] = 1023, +}; +static const unsigned short super_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 512, + [USB_ENDPOINT_XFER_ISOC] = 1024, + [USB_ENDPOINT_XFER_BULK] = 1024, + [USB_ENDPOINT_XFER_INT] = 1024, +}; + static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, int asnum, struct usb_host_interface *ifp, int num_ep, unsigned char *buffer, int size) @@ -179,6 +204,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, struct usb_endpoint_descriptor *d; struct usb_host_endpoint *endpoint; int n, i, j, retval; + unsigned int maxp; + const unsigned short *maxpacket_maxes; d = (struct usb_endpoint_descriptor *) buffer; buffer += d->bLength; @@ -286,6 +313,42 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, endpoint->desc.wMaxPacketSize = cpu_to_le16(8); } + /* Validate the wMaxPacketSize field */ + maxp = usb_endpoint_maxp(&endpoint->desc); + + /* Find the highest legal maxpacket size for this endpoint */ + i = 0; /* additional transactions per microframe */ + switch (to_usb_device(ddev)->speed) { + case USB_SPEED_LOW: + maxpacket_maxes = low_speed_maxpacket_maxes; + break; + case USB_SPEED_FULL: + maxpacket_maxes = full_speed_maxpacket_maxes; + break; + case USB_SPEED_HIGH: + /* Bits 12..11 are allowed only for HS periodic endpoints */ + if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) { + i = maxp & (BIT(12) | BIT(11)); + maxp &= ~i; + } + /* fallthrough */ + default: + maxpacket_maxes = high_speed_maxpacket_maxes; + break; + case USB_SPEED_SUPER: + case USB_SPEED_SUPER_PLUS: + maxpacket_maxes = super_speed_maxpacket_maxes; + break; + } + j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)]; + + if (maxp > j) { + dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n", + cfgno, inum, asnum, d->bEndpointAddress, maxp, j); + maxp = j; + endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp); + } + /* * Some buggy high speed devices have bulk endpoints using * maxpacket sizes other than 512. High speed HCDs may not @@ -293,9 +356,6 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, */ if (to_usb_device(ddev)->speed == USB_SPEED_HIGH && usb_endpoint_xfer_bulk(d)) { - unsigned maxp; - - maxp = usb_endpoint_maxp(&endpoint->desc) & 0x07ff; if (maxp != 512) dev_warn(ddev, "config %d interface %d altsetting %d " "bulk endpoint 0x%X has invalid maxpacket %d\n", -- cgit v0.10.2 From bc337b51508beb2d039aff5074a76cfe1c212030 Mon Sep 17 00:00:00 2001 From: Marc Ohlf Date: Wed, 3 Aug 2016 11:51:54 +0200 Subject: usb: ehci: change order of register cleanup during shutdown In ehci_turn_off_all_ports() all EHCI port registers are cleared to zero. On some hardware, this can lead to an system hang, when ehci_port_power() accesses the already cleared registers. This patch changes the order of cleanup. First call ehci_port_power() which respects the current bits in port status registers and afterwards cleanup the hard way by setting everything to zero. Signed-off-by: Marc Ohlf Acked-by: Alan Stern CC: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index a962b89..1e5f529 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -332,11 +332,11 @@ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci) int port = HCS_N_PORTS(ehci->hcs_params); while (port--) { - ehci_writel(ehci, PORT_RWC_BITS, - &ehci->regs->port_status[port]); spin_unlock_irq(&ehci->lock); ehci_port_power(ehci, port, false); spin_lock_irq(&ehci->lock); + ehci_writel(ehci, PORT_RWC_BITS, + &ehci->regs->port_status[port]); } } -- cgit v0.10.2 From 70f7ca9a0262784d0b80727860a63d64ab228e7b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 15 Jun 2016 15:56:11 +0200 Subject: usb: devio, do not warn when allocation fails usbdev_mmap allocates a buffer. The size of the buffer is determined by a user. So with this code (no need to be root): int fd = open("/dev/bus/usb/001/001", O_RDONLY); mmap(NULL, 0x800000, PROT_READ, MAP_SHARED, fd, 0); we can see a warning: WARNING: CPU: 0 PID: 21771 at ../mm/page_alloc.c:3563 __alloc_pages_slowpath+0x1036/0x16e0() ... Call Trace: [] ? warn_slowpath_null+0x2e/0x40 [] ? __alloc_pages_slowpath+0x1036/0x16e0 [] ? warn_alloc_failed+0x250/0x250 [] ? get_page_from_freelist+0x75b/0x28b0 [] ? __alloc_pages_nodemask+0x583/0x6b0 [] ? __alloc_pages_slowpath+0x16e0/0x16e0 [] ? dma_generic_alloc_coherent+0x104/0x220 [] ? hcd_buffer_alloc+0x1d6/0x3e0 [usbcore] [] ? hcd_buffer_destroy+0xa0/0xa0 [usbcore] [] ? usb_alloc_coherent+0x65/0x90 [usbcore] [] ? usbdev_mmap+0x1a5/0x770 [usbcore] ... Allocations like this one should be marked as __GFP_NOWARN. So do so. The size could be also clipped by something like: if (size >= (1 << (MAX_ORDER + PAGE_SHIFT - 1))) return -ENOMEM; But I think the overall limit of 16M (by usbfs_increase_memory_usage) is enough, so that we only silence the warning here. Signed-off-by: Jiri Slaby Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Steinar H. Gunderson Cc: Markus Rechberger Fixes: f7d34b445a (USB: Add support for usbfs zerocopy.) Cc: 4.6+ Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e9f5043..472cbcd 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -241,7 +241,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) goto error_decrease_mem; } - mem = usb_alloc_coherent(ps->dev, size, GFP_USER, &dma_handle); + mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN, + &dma_handle); if (!mem) { ret = -ENOMEM; goto error_free_usbm; -- cgit v0.10.2 From 5cce438298a0d2a7a857a4a3c3e26aeb8f77b941 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 10 Jun 2016 14:42:55 -0400 Subject: USB: remove race condition in usbfs/libusb when using reap-after-disconnect Hans de Goede has reported a difficulty in the Linux port of libusb. When a device is removed, the poll() system call in usbfs starts returning POLLERR as soon as udev->state is set to USB_STATE_NOTATTACHED, but the outstanding URBs are not available for reaping until some time later (after usbdev_remove() has been called). This is awkward for libusb or other usbfs clients, although not an insuperable problem. At any rate, it's easy to change usbfs so that it returns POLLHUP as soon as the state becomes USB_STATE_NOTATTACHED but it doesn't return POLLERR until after the outstanding URBs have completed. That's what this patch does; it uses the fact that ps->list is always on the dev->filelist list until usbdev_remove() takes it off, which happens after all the outstanding URBs have been cancelled. Signed-off-by: Alan Stern Reported-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 472cbcd..e6a6d67 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -2583,7 +2583,9 @@ static unsigned int usbdev_poll(struct file *file, if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed)) mask |= POLLOUT | POLLWRNORM; if (!connected(ps)) - mask |= POLLERR | POLLHUP; + mask |= POLLHUP; + if (list_empty(&ps->list)) + mask |= POLLERR; return mask; } -- cgit v0.10.2 From 59b71f774fc2ec2d985251e72fde0f9f88164547 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Thu, 21 Jul 2016 22:20:53 +0900 Subject: usb: host: max3421-hcd: fix mask of IO control register GPIO control register is divided into IOPINS1 and IOPINS2. And low 4-bit of register is controls output. So, this patch fixes wrong mask of GPIO output. Signed-off-by: Jaewon Kim Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index c369c29..2f76900 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -1675,7 +1675,7 @@ max3421_gpout_set_value(struct usb_hcd *hcd, u8 pin_number, u8 value) if (pin_number > 7) return; - mask = 1u << pin_number; + mask = 1u << (pin_number % 4); idx = pin_number / 4; if (value) -- cgit v0.10.2 From fd837b08d98c0c9f4f31998f2ed55b9d8694082c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 8 Aug 2016 17:29:28 +0100 Subject: KVM: arm64: ITS: return 1 on successful MSI injection According to the KVM API documentation a successful MSI injection should return a value > 0 on success. Return possible errors in vgic_its_trigger_msi() and report a successful injection back to userland, while also reporting the case where the MSI could not be delivered due to the guest not having the LPI mapped, for instance. Signed-off-by: Andre Przywara Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 56b0b7e..99ac022 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -337,6 +337,7 @@ */ #define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 #define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 #define E_ITS_MAPD_DEVICE_OOR 0x010801 #define E_ITS_MAPC_PROCNUM_OOR 0x010902 diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 07411cf..1bd8adb 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -441,39 +441,48 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, * Find the target VCPU and the LPI number for a given devid/eventid pair * and make this IRQ pending, possibly injecting it. * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. */ -static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, - u32 devid, u32 eventid) +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) { + struct kvm_vcpu *vcpu; struct its_itte *itte; if (!its->enabled) - return; + return -EBUSY; itte = find_itte(its, devid, eventid); - /* Triggering an unmapped IRQ gets silently dropped. */ - if (itte && its_is_collection_mapped(itte->collection)) { - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); - if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) { - spin_lock(&itte->irq->irq_lock); - itte->irq->pending = true; - vgic_queue_irq_unlock(kvm, itte->irq); - } - } + if (!itte || !its_is_collection_mapped(itte->collection)) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + if (!vcpu) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + if (!vcpu->arch.vgic_cpu.lpis_enabled) + return -EBUSY; + + spin_lock(&itte->irq->irq_lock); + itte->irq->pending = true; + vgic_queue_irq_unlock(kvm, itte->irq); + + return 0; } /* * Queries the KVM IO bus framework to get the ITS pointer from the given * doorbell address. * We then call vgic_its_trigger_msi() with the decoded data. + * According to the KVM_SIGNAL_MSI API description returns 1 on success. */ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) { u64 address; struct kvm_io_device *kvm_io_dev; struct vgic_io_device *iodev; + int ret; if (!vgic_has_its(kvm)) return -ENODEV; @@ -490,10 +499,21 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); mutex_lock(&iodev->its->its_lock); - vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); + ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); mutex_unlock(&iodev->its->its_lock); - return 0; + if (ret < 0) + return ret; + + /* + * KVM_SIGNAL_MSI demands a return value > 0 for success and 0 + * if the guest has blocked the MSI. So we map any LPI mapping + * related error to that. + */ + if (ret) + return 0; + else + return 1; } /* Requires the its_lock to be held. */ @@ -981,9 +1001,7 @@ static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, u32 msi_data = its_cmd_get_id(its_cmd); u64 msi_devid = its_cmd_get_deviceid(its_cmd); - vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); - - return 0; + return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); } /* -- cgit v0.10.2 From 17b963e319449f709e78dc1ef445d797a13eecbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Aug 2016 13:06:41 +0100 Subject: rxrpc: Need to flag call as being released on connect failure If rxrpc_new_client_call() fails to make a connection, the call record that it allocated needs to be marked as RXRPC_CALL_RELEASED before it is passed to rxrpc_put_call() to indicate that it no longer has any attachment to the AF_RXRPC socket. Without this, an assertion failure may occur at: net/rxrpc/call_object:635 Signed-off-by: David Howells diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index e8c953c..ae057e0 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -275,6 +275,7 @@ error: list_del_init(&call->link); write_unlock_bh(&rxrpc_call_lock); + set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; rxrpc_put_call(call); _leave(" = %d", ret); @@ -287,6 +288,7 @@ error: */ found_user_ID_now_present: write_unlock(&rx->call_lock); + set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; rxrpc_put_call(call); _leave(" = -EEXIST [%p]", call); -- cgit v0.10.2 From f9dc575725baeaad8eb5262589f9aebeeaf13433 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Aug 2016 10:27:26 +0100 Subject: rxrpc: Don't access connection from call if pointer is NULL The call state machine processor sets up the message parameters for a UDP message that it might need to transmit in advance on the basis that there's a very good chance it's going to have to transmit either an ACK or an ABORT. This requires it to look in the connection struct to retrieve some of the parameters. However, if the call is complete, the call connection pointer may be NULL to dissuade the processor from transmitting a message. However, there are some situations where the processor is still going to be called - and it's still going to set up message parameters whether it needs them or not. This results in a NULL pointer dereference at: net/rxrpc/call_event.c:837 To fix this, skip the message pre-initialisation if there's no connection attached. Signed-off-by: David Howells diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index f5e9916..e60cf65 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -837,6 +837,9 @@ void rxrpc_process_call(struct work_struct *work) return; } + if (!call->conn) + goto skip_msg_init; + /* there's a good chance we're going to have to send a message, so set * one up in advance */ msg.msg_name = &call->conn->params.peer->srx.transport; @@ -859,6 +862,7 @@ void rxrpc_process_call(struct work_struct *work) memset(iov, 0, sizeof(iov)); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); +skip_msg_init: /* deal with events of a final nature */ if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { -- cgit v0.10.2 From 2e7e9758b2b680fa615d5cf70b7af416e96162d2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Aug 2016 10:11:48 +0100 Subject: rxrpc: Once packet posted in data_ready, don't retry posting Once a packet has been posted to a connection in the data_ready handler, we mustn't try reposting if we then find that the connection is dying as the refcount has been given over to the dying connection and the packet might no longer exist. Losing the packet isn't a problem as the peer will retransmit. Signed-off-by: David Howells diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9e0f58e..04afdc0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -567,13 +567,13 @@ done: * post connection-level events to the connection * - this includes challenges, responses and some aborts */ -static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, +static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, struct sk_buff *skb) { _enter("%p,%p", conn, skb); skb_queue_tail(&conn->rx_queue, skb); - return rxrpc_queue_conn(conn); + rxrpc_queue_conn(conn); } /* @@ -694,7 +694,6 @@ void rxrpc_data_ready(struct sock *sk) rcu_read_lock(); -retry_find_conn: conn = rxrpc_find_connection_rcu(local, skb); if (!conn) goto cant_route_call; @@ -702,8 +701,7 @@ retry_find_conn: if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); - if (!rxrpc_post_packet_to_conn(conn, skb)) - goto retry_find_conn; + rxrpc_post_packet_to_conn(conn, skb); } else { /* Call-bound packets are routed by connection channel. */ unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; -- cgit v0.10.2 From 50fd85a1f94753b86a7f540794dfd4f799e81ac2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Aug 2016 11:30:43 +0100 Subject: rxrpc: Fix a use-after-push in data_ready handler Fix a use of a packet after it has been enqueued onto the packet processing queue in the data_ready handler. Once on a call's Rx queue, we mustn't touch it any more as it may be dequeued and freed by the call processor running on a work queue. Save the values we need before enqueuing. Without this, we can get an oops like the following: BUG: unable to handle kernel NULL pointer dereference at 000000000000009c IP: [] rxrpc_fast_process_packet+0x724/0xa11 [af_rxrpc] PGD 0 Oops: 0000 [#1] SMP Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G E 4.7.0-fsdevel+ #1336 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff88040d6863c0 task.stack: ffff88040d68c000 RIP: 0010:[] [] rxrpc_fast_process_packet+0x724/0xa11 [af_rxrpc] RSP: 0018:ffff88041fb03a78 EFLAGS: 00010246 RAX: ffffffffffffffff RBX: ffff8803ff195b00 RCX: 0000000000000001 RDX: ffffffffa01854d1 RSI: 0000000000000008 RDI: ffff8803ff195b00 RBP: ffff88041fb03ab0 R08: 0000000000000000 R09: 0000000000000001 R10: ffff88041fb038c8 R11: 0000000000000000 R12: ffff880406874800 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000009c CR3: 0000000001c14000 CR4: 00000000001406e0 Stack: ffff8803ff195ea0 ffff880408348800 ffff880406874800 ffff8803ff195b00 ffff880408348800 ffff8803ff195ed8 0000000000000000 ffff88041fb03af0 ffffffffa0186072 0000000000000000 ffff8804054da000 0000000000000000 Call Trace: [] rxrpc_data_ready+0x89d/0xbae [af_rxrpc] [] __sock_queue_rcv_skb+0x24c/0x2b2 [] __udp_queue_rcv_skb+0x4b/0x1bd [] udp_queue_rcv_skb+0x281/0x4db [] __udp4_lib_rcv+0x7ed/0x963 [] udp_rcv+0x15/0x17 [] ip_local_deliver_finish+0x1c3/0x318 [] ip_local_deliver+0xbb/0xc4 [] ? inet_del_offload+0x40/0x40 [] ip_rcv_finish+0x3ce/0x42c [] ip_rcv+0x304/0x33d [] ? ip_local_deliver_finish+0x318/0x318 [] __netif_receive_skb_core+0x601/0x6e8 [] __netif_receive_skb+0x13/0x54 [] netif_receive_skb_internal+0xbb/0x17c [] napi_gro_receive+0xf9/0x1bd [] rtl8169_poll+0x32b/0x4a8 [] net_rx_action+0xe8/0x357 [] __do_softirq+0x1aa/0x414 [] irq_exit+0x3d/0xb0 [] do_IRQ+0xe4/0xfc [] common_interrupt+0x93/0x93 [] ? cpuidle_enter_state+0x1ad/0x2be [] ? cpuidle_enter_state+0x1a8/0x2be [] cpuidle_enter+0x12/0x14 [] call_cpuidle+0x39/0x3b [] cpu_startup_entry+0x230/0x35d [] start_secondary+0xf4/0xf7 Signed-off-by: David Howells diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 04afdc0..7897190 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -124,11 +124,15 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, struct rxrpc_skb_priv *sp; bool terminal; int ret, ackbit, ack; + u32 serial; + u8 flags; _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); sp = rxrpc_skb(skb); ASSERTCMP(sp->call, ==, NULL); + flags = sp->hdr.flags; + serial = sp->hdr.serial; spin_lock(&call->lock); @@ -192,8 +196,8 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, sp->call = call; rxrpc_get_call(call); atomic_inc(&call->skb_count); - terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && - !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); + terminal = ((flags & RXRPC_LAST_PACKET) && + !(flags & RXRPC_CLIENT_INITIATED)); ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOBUFS) { @@ -205,12 +209,13 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, } skb = NULL; + sp = NULL; _debug("post #%u", seq); ASSERTCMP(call->rx_data_post, ==, seq); call->rx_data_post++; - if (sp->hdr.flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) set_bit(RXRPC_CALL_RCVD_LAST, &call->flags); /* if we've reached an out of sequence packet then we need to drain @@ -226,7 +231,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, spin_unlock(&call->lock); atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false); + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false); _leave(" = 0 [posted]"); return 0; @@ -239,7 +244,7 @@ out: discard_and_ack: _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true); + __rxrpc_propose_ACK(call, ack, serial, true); discard: spin_unlock(&call->lock); rxrpc_free_skb(skb); @@ -247,7 +252,7 @@ discard: return 0; enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true); + __rxrpc_propose_ACK(call, ack, serial, true); enqueue_packet: _net("defer skb %p", skb); spin_unlock(&call->lock); -- cgit v0.10.2 From 992c273af9d9f12a2e470731f69bb3baa694562b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Aug 2016 16:58:42 +0100 Subject: rxrpc: Free packets discarded in data_ready Under certain conditions, the data_ready handler will discard a packet. These need to be freed. Signed-off-by: David Howells diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7897190..70bb778 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -744,6 +744,8 @@ cant_route_call: if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { _debug("reject type %d",sp->hdr.type); rxrpc_reject_packet(local, skb); + } else { + rxrpc_free_skb(skb); } _leave(" [no call]"); return; -- cgit v0.10.2 From 8b078c603249239f597dc395ac182667c8e0af9c Mon Sep 17 00:00:00 2001 From: Mathias Koehrer Date: Tue, 9 Aug 2016 10:33:31 +0200 Subject: PCI: Update "pci=resource_alignment" documentation Some uio based PCI drivers, e.g., uio_cif, do not work if the assigned PCI memory resources are not page aligned. By using the kernel option "pci=resource_alignment=@:." it is possible to request page alignment for memory resources of devices. However, this is cumbersome when using several devices, and the bus/slot/func addresses may change if devices are added to or removed from the system. Extend the "pci=resource_alignment" option so we can specify the relevant devices via PCI vendor, device, subvendor, and subdevice IDs. The specification of the devices via IDs is indicated by a leading string "pci:" as argument to "pci=resource_alignment". The format of the specification is pci::[::] Examples: pci=resource_alignment=4096@pci:8086:9c22:103c:198f pci=resource_alignment=pci:8086:9c22 # defaults to PAGE_SIZE align [bhelgaas: changelog, use actual vendor/device IDs in examples] Signed-off-by: Mathias Koehrer Signed-off-by: Bjorn Helgaas diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 46c030a..a4f4d69 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3032,6 +3032,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource windows need to be expanded. + To specify the alignment for several + instances of a device, the PCI vendor, + device, subvendor, and subdevice may be + specified, e.g., 4096@pci:8086:9c22:103c:198f ecrc= Enable/disable PCIe ECRC (transaction layer end-to-end CRC checking). bios: Use BIOS/firmware settings. This is the -- cgit v0.10.2 From beab47d55b883e85bec7771a899ac791c6f92c80 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 19 Jul 2016 22:42:22 +0200 Subject: of: Delete an unnecessary check before the function call "of_node_put" The of_node_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Rob Herring diff --git a/drivers/of/base.c b/drivers/of/base.c index 7792266..cb255a0 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1631,8 +1631,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np, */ err: - if (it.node) - of_node_put(it.node); + of_node_put(it.node); return rc; } -- cgit v0.10.2 From 89c67752ae4dedc1244344b266c837ddb4053ebd Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 1 Aug 2016 17:17:53 +1000 Subject: drivers/of: Validate device node in __unflatten_device_tree() @mynodes is set to NULL when __unflatten_device_tree() is called to unflatten device sub-tree in PCI hot add scenario on PowerPC PowerNV platform. Marking @mynodes detached unconditionally causes kernel crash as below backtrace shows: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc000000000b26f64 cpu 0x0: Vector: 300 (Data Access) at [c000003fcc7cf740] pc: c000000000b26f64: __unflatten_device_tree+0xf4/0x190 lr: c000000000b26f40: __unflatten_device_tree+0xd0/0x190 sp: c000003fcc7cf9c0 msr: 900000000280b033 dar: 0 dsisr: 40000000 current = 0xc000003fcc281680 paca = 0xc00000000ff00000 softe: 0 irq_happened: 0x01 pid = 2724, comm = sh Linux version 4.7.0-gavin-07754-g92a6836 (gwshan@gwshan) (gcc version \ 4.9.3 (Buildroot 2016.02-rc2-00093-g5ea3bce) ) #539 SMP Mon Aug 1 \ 12:40:29 AEST 2016 enter ? for help [c000003fcc7cfa50] c000000000b27060 of_fdt_unflatten_tree+0x60/0x90 [c000003fcc7cfaa0] c0000000004c6288 pnv_php_set_slot_power_state+0x118/0x440 [c000003fcc7cfb80] c0000000004c6a10 pnv_php_enable+0xc0/0x170 [c000003fcc7cfbd0] c0000000004c4d80 power_write_file+0xa0/0x190 [c000003fcc7cfc50] c0000000004be93c pci_slot_attr_store+0x3c/0x60 [c000003fcc7cfc70] c0000000002d3fd4 sysfs_kf_write+0x94/0xc0 [c000003fcc7cfcb0] c0000000002d2c30 kernfs_fop_write+0x180/0x260 [c000003fcc7cfd00] c000000000230fe0 __vfs_write+0x40/0x190 [c000003fcc7cfd90] c000000000232278 vfs_write+0xc8/0x240 [c000003fcc7cfde0] c000000000233d90 SyS_write+0x60/0x110 [c000003fcc7cfe30] c000000000009524 system_call+0x38/0x108 This avoids the kernel crash by marking @mynodes detached only when @mynodes is dereferencing valid device node in __unflatten_device_tree(). Fixes: 1d1bde550ea3 ("of: fdt: mark unflattened tree as detached") Reported-by: Meng Li Signed-off-by: Gavin Shan Signed-off-by: Rob Herring diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 55f1b83..085c638 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -517,7 +517,7 @@ static void *__unflatten_device_tree(const void *blob, pr_warning("End of tree marker overwritten: %08x\n", be32_to_cpup(mem + size)); - if (detached) { + if (detached && mynodes) { of_node_set_flag(*mynodes, OF_DETACHED); pr_debug("unflattened tree is detached\n"); } -- cgit v0.10.2 From e55aeb6ba4e8cc3549bff1e75ea1d029324bce21 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 9 Aug 2016 16:18:51 +0200 Subject: of/irq: Mark interrupt controllers as populated before initialisation That way the init callback may clear the flag again, in case of drivers split between early irq chip and a normal platform driver. Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Suggested-by: Rob Herring Signed-off-by: Philipp Zabel Acked-by: Jon Hunter Signed-off-by: Rob Herring diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 89a71c6..a2e68f7 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -544,12 +544,15 @@ void __init of_irq_init(const struct of_device_id *matches) list_del(&desc->list); + of_node_set_flag(desc->dev, OF_POPULATED); + pr_debug("of_irq_init: init %s (%p), parent %p\n", desc->dev->full_name, desc->dev, desc->interrupt_parent); ret = desc->irq_init_cb(desc->dev, desc->interrupt_parent); if (ret) { + of_node_clear_flag(desc->dev, OF_POPULATED); kfree(desc); continue; } @@ -559,8 +562,6 @@ void __init of_irq_init(const struct of_device_id *matches) * its children can get processed in a subsequent pass. */ list_add_tail(&desc->list, &intc_parent_list); - - of_node_set_flag(desc->dev, OF_POPULATED); } /* Get the next pending parent that might have children */ -- cgit v0.10.2 From 255c0397bc402e3fcc8833c214f49b2108f04d1a Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 9 Aug 2016 16:18:52 +0200 Subject: ARM: imx6: mark GPC node as not populated after irq init to probe pm domain driver Since IRQCHIP_DECLARE now flags the GPC node as already populated, the GPC power domain driver is never probed unless we clear the flag again. Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Suggested-by: Rob Herring Signed-off-by: Philipp Zabel Cc: Jon Hunter Signed-off-by: Rob Herring diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index fd87205..0df062d 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -271,6 +271,12 @@ static int __init imx_gpc_init(struct device_node *node, for (i = 0; i < IMR_NUM; i++) writel_relaxed(~0, gpc_base + GPC_IMR1 + i * 4); + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the GPC power domain driver will not be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); + return 0; } IRQCHIP_DECLARE(imx_gpc, "fsl,imx6q-gpc", imx_gpc_init); -- cgit v0.10.2 From a5d0dc810abf3d6b241777467ee1d6efb02575fc Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Tue, 9 Aug 2016 15:29:42 -0400 Subject: vti: flush x-netns xfrm cache when vti interface is removed When executing the script included below, the netns delete operation hangs with the following message (repeated at 10 second intervals): kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1 This occurs because a reference to the lo interface in the "secure" netns is still held by a dst entry in the xfrm bundle cache in the init netns. Address this problem by garbage collecting the tunnel netns flow cache when a cross-namespace vti interface receives a NETDEV_DOWN notification. A more detailed description of the problem scenario (referencing commands in the script below): (1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 The vti_test interface is created in the init namespace. vti_tunnel_init() attaches a struct ip_tunnel to the vti interface's netdev_priv(dev), setting the tunnel net to &init_net. (2) ip link set vti_test netns secure The vti_test interface is moved to the "secure" netns. Note that the associated struct ip_tunnel still has tunnel->net set to &init_net. (3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 The first packet sent using the vti device causes xfrm_lookup() to be called as follows: dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0); Note that tunnel->net is the init namespace, while skb_dst(skb) references the vti_test interface in the "secure" namespace. The returned dst references an interface in the init namespace. Also note that the first parameter to xfrm_lookup() determines which flow cache is used to store the computed xfrm bundle, so after xfrm_lookup() returns there will be a cached bundle in the init namespace flow cache with a dst referencing a device in the "secure" namespace. (4) ip netns del secure Kernel begins to delete the "secure" namespace. At some point the vti_test interface is deleted, at which point dst_ifdown() changes the dst->dev in the cached xfrm bundle flow from vti_test to lo (still in the "secure" namespace however). Since nothing has happened to cause the init namespace's flow cache to be garbage collected, this dst remains attached to the flow cache, so the kernel loops waiting for the last reference to lo to go away. ip link add br1 type bridge ip link set dev br1 up ip addr add dev br1 1.1.1.1/8 ip netns add secure ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 ip link set vti_test netns secure ip netns exec secure ip link set vti_test up ip netns exec secure ip link s lo up ip netns exec secure ip addr add dev lo 192.168.100.1/24 ip netns exec secure ip route add 192.168.200.0/24 dev vti_test ip xfrm policy flush ip xfrm state flush ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \ proto esp mode tunnel mark 1 ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \ proto esp mode tunnel mark 1 ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \ mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \ mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 ip netns del secure Reported-by: Hangbin Liu Reported-by: Jan Tluka Signed-off-by: Lance Richardson Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index a917903..cc701fa 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; +static bool is_vti_tunnel(const struct net_device *dev) +{ + return dev->netdev_ops == &vti_netdev_ops; +} + +static int vti_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ip_tunnel *tunnel = netdev_priv(dev); + + if (!is_vti_tunnel(dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_DOWN: + if (!net_eq(tunnel->net, dev_net(dev))) + xfrm_garbage_collect(tunnel->net); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block vti_notifier_block __read_mostly = { + .notifier_call = vti_device_event, +}; + static int __init vti_init(void) { const char *msg; @@ -564,6 +591,8 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); + register_netdevice_notifier(&vti_notifier_block); + msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -596,6 +625,7 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: + unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -607,6 +637,7 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); + unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); -- cgit v0.10.2 From 64827adc9e14c8d17cf5f3d5a9ee47a42e95dd8c Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 28 Jul 2016 17:20:32 +0800 Subject: drm/amdgpu: fix vm init error path Signed-off-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8e642fc..80120fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1535,7 +1535,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) r = amd_sched_entity_init(&ring->sched, &vm->entity, rq, amdgpu_sched_jobs); if (r) - return r; + goto err; vm->page_directory_fence = NULL; @@ -1565,6 +1565,9 @@ error_free_page_directory: error_free_sched_entity: amd_sched_entity_fini(&ring->sched, &vm->entity); +err: + drm_free_large(vm->page_tables); + return r; } -- cgit v0.10.2 From 254a49d5139a70828d652ef4faec40763993e403 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 9 Aug 2016 15:09:44 +0300 Subject: drivers: net: cpsw: fix kmemleak false-positive reports for sk buffers Kmemleak reports following false positive memory leaks for each sk buffers allocated by CPSW (__netdev_alloc_skb_ip_align()) in cpsw_ndo_open() and cpsw_rx_handler(): unreferenced object 0xea915000 (size 2048): comm "systemd-network", pid 713, jiffies 4294938323 (age 102.180s) hex dump (first 32 bytes): 00 58 91 ea ff ff ff ff ff ff ff ff ff ff ff ff .X.............. ff ff ff ff ff ff fd 0f 00 00 00 00 00 00 00 00 ................ backtrace: [] __kmalloc_track_caller+0x1a4/0x230 [] __alloc_skb+0x68/0x16c [] __netdev_alloc_skb+0x40/0x104 [] cpsw_ndo_open+0x374/0x670 [ti_cpsw] [] __dev_open+0xb0/0x114 [] __dev_change_flags+0x9c/0x14c [] dev_change_flags+0x20/0x50 [] do_setlink+0x2cc/0x78c [] rtnl_setlink+0xcc/0x100 [] rtnetlink_rcv_msg+0x184/0x224 [] netlink_rcv_skb+0xa8/0xc4 [] rtnetlink_rcv+0x2c/0x34 [] netlink_unicast+0x16c/0x1f8 [] netlink_sendmsg+0x334/0x348 [] sock_sendmsg+0x1c/0x2c [] SyS_sendto+0xc0/0xe8 unreferenced object 0xec861780 (size 192): comm "softirq", pid 0, jiffies 4294938759 (age 109.540s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 b0 5a ed 00 00 00 00 00 00 00 00 ......Z......... backtrace: [] kmem_cache_alloc+0x190/0x208 [] __build_skb+0x30/0x98 [] __netdev_alloc_skb+0xb8/0x104 [] cpsw_rx_handler+0x68/0x1e4 [ti_cpsw] [] __cpdma_chan_free+0xa8/0xc4 [davinci_cpdma] [] __cpdma_chan_process+0x14c/0x16c [davinci_cpdma] [] cpdma_chan_process+0x44/0x5c [davinci_cpdma] [] cpsw_rx_poll+0x1c/0x9c [ti_cpsw] [] net_rx_action+0x1f0/0x2ec [] __do_softirq+0x134/0x258 [] do_softirq+0x68/0x70 [] __local_bh_enable_ip+0xd4/0xe8 [] _raw_spin_unlock_bh+0x30/0x34 [] igmp6_group_added+0x4c/0x1bc [] ipv6_dev_mc_inc+0x398/0x434 [] addrconf_dad_work+0x224/0x39c This happens because CPSW allocates SK buffers and then passes pointers on them in CPDMA where they stored in internal CPPI RAM (SRAM) which belongs to DEV MMIO space. Kmemleak does not scan IO memory and so reports memory leaks. Hence, mark allocated sk buffers as false positive explicitly. Cc: Catalin Marinas Signed-off-by: Grygorii Strashko Acked-by: Catalin Marinas Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c51f346..f85d605 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -734,6 +734,7 @@ static void cpsw_rx_handler(void *token, int len, int status) netif_receive_skb(skb); ndev->stats.rx_bytes += len; ndev->stats.rx_packets++; + kmemleak_not_leak(new_skb); } else { ndev->stats.rx_dropped++; new_skb = skb; @@ -1325,6 +1326,7 @@ static int cpsw_ndo_open(struct net_device *ndev) kfree_skb(skb); goto err_cleanup; } + kmemleak_not_leak(skb); } /* continue even if we didn't manage to submit all * receive descs -- cgit v0.10.2 From 0d039f337f45c48fb78b80cbf7b706b4de7f07ea Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 9 Aug 2016 21:36:04 +0800 Subject: bonding: fix the typo The message "803.ad" should be "802.3ad". Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1f276fa..217e8da 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -152,7 +152,7 @@ module_param(lacp_rate, charp, 0); MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " "0 for slow, 1 for fast"); module_param(ad_select, charp, 0); -MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; " +MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; " "0 for stable (default), 1 for bandwidth, " "2 for count"); module_param(min_links, int, 0); -- cgit v0.10.2 From b9a019899f61acca18df5fb5e38a8fcdfea86fcd Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 28 Jul 2016 19:38:07 +0100 Subject: ARM: 8590/1: sanity_check_meminfo(): avoid overflow on vmalloc_limit To limit the amount of mapped low memory, we determine a physical address boundary based on the start of the vmalloc area using __pa(). Strictly speaking, the vmalloc area location is arbitrary and does not necessarily corresponds to a valid physical address. For example, if PAGE_OFFSET = 0x80000000 PHYS_OFFSET = 0x90000000 vmalloc_min = 0xf0000000 then __pa(vmalloc_min) overflows and returns a wrapped 0 when phys_addr_t is a 32-bit type. Then the code that follows determines that the entire physical memory is above that boundary and no low memory gets mapped at all: |[...] |Machine model: Freescale i.MX51 NA04 Board |Ignoring RAM at 0x90000000-0xb0000000 (!CONFIG_HIGHMEM) |Consider using a HIGHMEM enabled kernel. To avoid this problem let's make vmalloc_limit a 64-bit value all the time and determine that boundary explicitly without using __pa(). Reported-by: Emil Renner Berthing Signed-off-by: Nicolas Pitre Tested-by: Emil Renner Berthing Signed-off-by: Russell King diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 62f4d01..12774c8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1155,10 +1155,19 @@ void __init sanity_check_meminfo(void) { phys_addr_t memblock_limit = 0; int highmem = 0; - phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; + u64 vmalloc_limit; struct memblock_region *reg; bool should_use_highmem = false; + /* + * Let's use our own (unoptimized) equivalent of __pa() that is + * not affected by wrap-arounds when sizeof(phys_addr_t) == 4. + * The result is used as the upper bound on physical memory address + * and may itself be outside the valid range for which phys_addr_t + * and therefore __pa() is defined. + */ + vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET; + for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; phys_addr_t block_end = reg->base + reg->size; @@ -1183,10 +1192,11 @@ void __init sanity_check_meminfo(void) if (reg->size > size_limit) { phys_addr_t overlap_size = reg->size - size_limit; - pr_notice("Truncating RAM at %pa-%pa to -%pa", - &block_start, &block_end, &vmalloc_limit); - memblock_remove(vmalloc_limit, overlap_size); + pr_notice("Truncating RAM at %pa-%pa", + &block_start, &block_end); block_end = vmalloc_limit; + pr_cont(" to -%pa", &block_end); + memblock_remove(vmalloc_limit, overlap_size); should_use_highmem = true; } } -- cgit v0.10.2 From 61444cde9170e256c238a02c9a4861930db04f5f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 28 Jul 2016 19:48:44 +0100 Subject: ARM: 8591/1: mm: use fully constructed struct pages for EFI pgd allocations The late_alloc() PTE allocation function used by create_mapping_late() does not call pgtable_page_ctor() on PTE pages it allocates, leaving the per-page spinlock uninitialized. Since generic page table manipulation code may assume that translation table pages that are not owned by init_mm are covered by fully constructed struct pages, the following crash may occur with the new UEFI memory attributes table code. efi: memattr: Processing EFI Memory Attributes table: efi: memattr: 0x0000ffa16000-0x0000ffa82fff [Runtime Code |RUN| | |XP| | | | | | | | ] Unable to handle kernel NULL pointer dereference at virtual address 00000010 pgd = c0204000 [00000010] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc4-00063-g3882aa7b340b #361 Hardware name: Generic DT based system task: ed858000 ti: ed842000 task.ti: ed842000 PC is at __lock_acquire+0xa0/0x19a8 ... [] (__lock_acquire) from [] (lock_acquire+0x6c/0x88) [] (lock_acquire) from [] (_raw_spin_lock+0x2c/0x3c) [] (_raw_spin_lock) from [] (apply_to_page_range+0xe8/0x238) [] (apply_to_page_range) from [] (efi_set_mapping_permissions+0x54/0x5c) [] (efi_set_mapping_permissions) from [] (efi_memattr_apply_permissions+0x2b8/0x378) [] (efi_memattr_apply_permissions) from [] (arm_enable_runtime_services+0x1f0/0x22c) [] (arm_enable_runtime_services) from [] (do_one_initcall+0x44/0x174) [] (do_one_initcall) from [] (kernel_init_freeable+0x90/0x1e8) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) [] (kernel_init) from [] (ret_from_fork+0x14/0x24) The crash is due to the fact that the UEFI page tables are not owned by init_mm, but are not covered by fully constructed struct pages. Given that the UEFI subsystem is currently the only user of create_mapping_late(), add an unconditional call to pgtable_page_ctor() to late_alloc(). Fixes: 9fc68b717c24 ("ARM/efi: Apply strict permissions for UEFI Runtime Services regions") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 12774c8..6344913 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -728,7 +728,8 @@ static void *__init late_alloc(unsigned long sz) { void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz)); - BUG_ON(!ptr); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); return ptr; } -- cgit v0.10.2 From 87eed3c74d7c65556f744230a90bf9556dd29146 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Aug 2016 10:33:35 +0100 Subject: ARM: fix address limit restoration for undefined instructions During boot, sometimes the kernel will test to see if an instruction causes an undefined instruction exception. Unfortunately, the exit path for these exceptions did not restore the address limit, which causes the rootfs mount code to fail. Fix the missing address limit restoration. Tested-by: Guenter Roeck Signed-off-by: Russell King diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bc5f507..9f157e7 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -295,6 +295,7 @@ __und_svc_fault: bl __und_fault __und_svc_finish: + get_thread_info tsk ldr r5, [sp, #S_PSR] @ Get SVC cpsr svc_exit r5 @ return from exception UNWIND(.fnend ) -- cgit v0.10.2 From a96d3b7593a3eefab62dd930e5c99201c3678ee4 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 9 Aug 2016 18:00:08 +0200 Subject: dm9000: Fix irq trigger type setup on non-dt platforms Commit b5a099c67a1c36b "net: ethernet: davicom: fix devicetree irq resource" causes an interrupt storm after the ethernet interface is activated on S3C24XX platform (ARM non-dt), due to the interrupt trigger type not being set properly. It seems, after adding parsing of IRQ flags in commit 7085a7401ba54e92b "drivers: platform: parse IRQ flags from resources", there is no path for non-dt platforms where irq_set_type callback could be invoked when we don't pass the trigger type flags to the request_irq() call. In case of a board where the regression is seen the interrupt trigger type flags are passed through a platform device's resource and it is not currently handled properly without passing the irq trigger type flags to the request_irq() call. In case of OF an of_irq_get() call within platform_get_irq() function seems to be ensuring required irq_chip setup, but there is no equivalent code for non OF/ACPI platforms. This patch mostly restores irq trigger type setting code which has been removed in commit ("net: ethernet: davicom: fix devicetree irq resource"). Fixes: b5a099c67a1c36b913 ("net: ethernet: davicom: fix devicetree irq resource") Signed-off-by: Sylwester Nawrocki Acked-by: Robert Jarzmik Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 1471e16..f45385f 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1299,6 +1299,7 @@ static int dm9000_open(struct net_device *dev) { struct board_info *db = netdev_priv(dev); + unsigned int irq_flags = irq_get_trigger_type(dev->irq); if (netif_msg_ifup(db)) dev_dbg(db->dev, "enabling %s\n", dev->name); @@ -1306,9 +1307,11 @@ dm9000_open(struct net_device *dev) /* If there is no IRQ type specified, tell the user that this is a * problem */ - if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE) + if (irq_flags == IRQF_TRIGGER_NONE) dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n"); + irq_flags |= IRQF_SHARED; + /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */ iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */ mdelay(1); /* delay needs by DM9000B */ @@ -1316,8 +1319,7 @@ dm9000_open(struct net_device *dev) /* Initialize DM9000 board */ dm9000_init_dm9000(dev); - if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED, - dev->name, dev)) + if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev)) return -EAGAIN; /* Now that we have an interrupt handler hooked up we can unmask * our interrupts -- cgit v0.10.2 From 836384d2501dee87b1c437f3e268871980c857bf Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Fri, 5 Aug 2016 14:35:41 +0800 Subject: net: phy: micrel: Add specific suspend Disable all interrupts when suspend, they will be enabled when resume. Otherwise, the suspend/resume process will be blocked occasionally. Signed-off-by: Wenyou Yang Acked-by: Nicolas Ferre Signed-off-by: David S. Miller diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1882d98..053e879 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -677,17 +677,28 @@ static void kszphy_get_stats(struct phy_device *phydev, data[i] = kszphy_get_stat(phydev, i); } -static int kszphy_resume(struct phy_device *phydev) +static int kszphy_suspend(struct phy_device *phydev) { - int value; + /* Disable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_DISABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } - mutex_lock(&phydev->lock); + return genphy_suspend(phydev); +} - value = phy_read(phydev, MII_BMCR); - phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN); +static int kszphy_resume(struct phy_device *phydev) +{ + genphy_resume(phydev); - kszphy_config_intr(phydev); - mutex_unlock(&phydev->lock); + /* Enable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_ENABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } return 0; } @@ -900,7 +911,7 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, + .suspend = kszphy_suspend, .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8061, -- cgit v0.10.2 From 06f4e94898918bcad00cdd4d349313a439d6911e Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Tue, 9 Aug 2016 11:25:01 +0800 Subject: cpuset: make sure new tasks conform to the current config of the cpuset A new task inherits cpus_allowed and mems_allowed masks from its parent, but if someone changes cpuset's config by writing to cpuset.cpus/cpuset.mems before this new task is inserted into the cgroup's task list, the new task won't be updated accordingly. Signed-off-by: Zefan Li Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c7fd277..c27e533 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2069,6 +2069,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) mutex_unlock(&cpuset_mutex); } +/* + * Make sure the new task conform to the current state of its parent, + * which could have been changed by cpuset just after it inherits the + * state from the parent and before it sits on the cgroup's task list. + */ +void cpuset_fork(struct task_struct *task) +{ + if (task_css_is_root(task, cpuset_cgrp_id)) + return; + + set_cpus_allowed_ptr(task, ¤t->cpus_allowed); + task->mems_allowed = current->mems_allowed; +} + struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, @@ -2079,6 +2093,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .attach = cpuset_attach, .post_attach = cpuset_post_attach, .bind = cpuset_bind, + .fork = cpuset_fork, .legacy_cftypes = files, .early_init = true, }; -- cgit v0.10.2 From 7bb90c3715a496c650b2e879225030f9dd9cfafb Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 4 Aug 2016 11:11:19 +0900 Subject: bridge: Fix problems around fdb entries pointing to the bridge device Adding fdb entries pointing to the bridge device uses fdb_insert(), which lacks various checks and does not respect added_by_user flag. As a result, some inconsistent behavior can happen: * Adding temporary entries succeeds but results in permanent entries. * Same goes for "dynamic" and "use". * Changing mac address of the bridge device causes deletion of user-added entries. * Replacing existing entries looks successful from userspace but actually not, regardless of NLM_F_EXCL flag. Use the same logic as other entries and fix them. Fixes: 3741873b4f73 ("bridge: allow adding of fdb entries pointing to the bridge device") Signed-off-by: Toshiaki Makita Acked-by: Roopa Prabhu Signed-off-by: David S. Miller diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index c18080a..cd620fa 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -267,7 +267,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) /* If old entry was unassociated with any port, then delete it. */ f = __br_fdb_get(br, br->dev->dev_addr, 0); - if (f && f->is_local && !f->dst) + if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, 0); @@ -282,7 +282,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (!br_vlan_should_use(v)) continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); - if (f && f->is_local && !f->dst) + if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, v->vid); } @@ -764,20 +764,25 @@ out: } /* Update (create or replace) forwarding database entry */ -static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, - __u16 state, __u16 flags, __u16 vid) +static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, + const __u8 *addr, __u16 state, __u16 flags, __u16 vid) { - struct net_bridge *br = source->br; struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; bool modified = false; /* If the port cannot learn allow only local and static entries */ - if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) && + if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) && !(source->state == BR_STATE_LEARNING || source->state == BR_STATE_FORWARDING)) return -EPERM; + if (!source && !(state & NUD_PERMANENT)) { + pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n", + br->dev->name); + return -EINVAL; + } + fdb = fdb_find(head, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) @@ -832,22 +837,28 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } -static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, - const unsigned char *addr, u16 nlh_flags, u16 vid) +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, + struct net_bridge_port *p, const unsigned char *addr, + u16 nlh_flags, u16 vid) { int err = 0; if (ndm->ndm_flags & NTF_USE) { + if (!p) { + pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n", + br->dev->name); + return -EINVAL; + } local_bh_disable(); rcu_read_lock(); - br_fdb_update(p->br, p, addr, vid, true); + br_fdb_update(br, p, addr, vid, true); rcu_read_unlock(); local_bh_enable(); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, + spin_lock_bh(&br->hash_lock); + err = fdb_add_entry(br, p, addr, ndm->ndm_state, nlh_flags, vid); - spin_unlock_bh(&p->br->hash_lock); + spin_unlock_bh(&br->hash_lock); } return err; @@ -884,6 +895,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], dev->name); return -EINVAL; } + br = p->br; vg = nbp_vlan_group(p); } @@ -895,15 +907,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - if (dev->priv_flags & IFF_EBRIDGE) - err = br_fdb_insert(br, NULL, addr, vid); - else - err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid); } else { - if (dev->priv_flags & IFF_EBRIDGE) - err = br_fdb_insert(br, NULL, addr, 0); - else - err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0); if (err || !vg || !vg->num_vlans) goto out; @@ -914,11 +920,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - if (dev->priv_flags & IFF_EBRIDGE) - err = br_fdb_insert(br, NULL, addr, v->vid); - else - err = __br_fdb_add(ndm, p, addr, nlh_flags, - v->vid); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid); if (err) goto out; } -- cgit v0.10.2 From 9ba333dc55cbb9523553df973adb3024d223e905 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 8 Aug 2016 14:08:17 +0200 Subject: s390/dasd: fix hanging device after clear subchannel When a device is in a status where CIO has killed all I/O by itself the interrupt for a clear request may not contain an irb to determine the clear function. Instead it contains an error pointer -EIO. This was ignored by the DASD int_handler leading to a hanging device waiting for a clear interrupt. Handle -EIO error pointer correctly for requests that are clear pending and treat the clear as successful. Signed-off-by: Stefan Haberland Reviewed-by: Sebastian Ott Cc: stable@vger.kernel.org Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8973d34..fb1b56a 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1643,9 +1643,18 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, u8 *sense = NULL; int expires; + cqr = (struct dasd_ccw_req *) intparm; if (IS_ERR(irb)) { switch (PTR_ERR(irb)) { case -EIO: + if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) { + device = (struct dasd_device *) cqr->startdev; + cqr->status = DASD_CQR_CLEARED; + dasd_device_clear_timer(device); + wake_up(&dasd_flush_wq); + dasd_schedule_device_bh(device); + return; + } break; case -ETIMEDOUT: DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " @@ -1661,7 +1670,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, } now = get_tod_clock(); - cqr = (struct dasd_ccw_req *) intparm; /* check for conditions that should be handled immediately */ if (!cqr || !(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) && -- cgit v0.10.2 From 4d81aaa53c2dea220ddf88e19c33033d6cf4f8cb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Aug 2016 12:26:28 +0200 Subject: s390/pageattr: handle numpages parameter correctly Both set_memory_ro() and set_memory_rw() will modify the page attributes of at least one page, even if the numpages parameter is zero. The author expected that calling these functions with numpages == zero would never happen. However with the new 444d13ff10fb ("modules: add ro_after_init support") feature this happens frequently. Therefore do the right thing and make these two functions return gracefully if nothing should be done. Fixes crashes on module load like this one: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 000003ff80008000 TEID: 000003ff80008407 Fault in home space mode while using kernel ASCE. AS:0000000000d18007 R3:00000001e6aa4007 S:00000001e6a10800 P:00000001e34ee21d Oops: 0004 ilc:3 [#1] SMP Modules linked in: x_tables CPU: 10 PID: 1 Comm: systemd Not tainted 4.7.0-11895-g3fa9045 #4 Hardware name: IBM 2964 N96 703 (LPAR) task: 00000001e9118000 task.stack: 00000001e9120000 Krnl PSW : 0704e00180000000 00000000005677f8 (rb_erase+0xf0/0x4d0) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 000003ff80008b20 000003ff80008b20 000003ff80008b70 0000000000b9d608 000003ff80008b20 0000000000000000 00000001e9123e88 000003ff80008950 00000001e485ab40 000003ff00000000 000003ff80008b00 00000001e4858480 0000000100000000 000003ff80008b68 00000000001d5998 00000001e9123c28 Krnl Code: 00000000005677e8: ec1801c3007c cgij %r1,0,8,567b6e 00000000005677ee: e32010100020 cg %r2,16(%r1) #00000000005677f4: a78401c2 brc 8,567b78 >00000000005677f8: e35010080024 stg %r5,8(%r1) 00000000005677fe: ec5801af007c cgij %r5,0,8,567b5c 0000000000567804: e30050000024 stg %r0,0(%r5) 000000000056780a: ebacf0680004 lmg %r10,%r12,104(%r15) 0000000000567810: 07fe bcr 15,%r14 Call Trace: ([<000003ff80008900>] __this_module+0x0/0xffffffffffffd700 [x_tables]) ([<0000000000264fd4>] do_init_module+0x12c/0x220) ([<00000000001da14a>] load_module+0x24e2/0x2b10) ([<00000000001da976>] SyS_finit_module+0xbe/0xd8) ([<0000000000803b26>] system_call+0xd6/0x264) Last Breaking-Event-Address: [<000000000056771a>] rb_erase+0x12/0x4d0 Kernel panic - not syncing: Fatal exception: panic_on_oops Reported-by: Christian Borntraeger Reported-and-tested-by: Sebastian Ott Fixes: e8a97e42dc98 ("s390/pageattr: allow kernel page table splitting") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 7104ffb..af7cf28c 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -252,6 +252,8 @@ static int change_page_attr(unsigned long addr, unsigned long end, int rc = -EINVAL; pgd_t *pgdp; + if (addr == end) + return 0; if (end >= MODULES_END) return -EINVAL; mutex_lock(&cpa_mutex); -- cgit v0.10.2 From 99e5e886a0a59df267ff6838f763b789847df982 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 1 Aug 2016 20:25:33 +0200 Subject: KVM: arm64: vgic-its: Handle errors from vgic_add_lpi During low memory conditions, we could be dereferencing a NULL pointer when vgic_add_lpi fails to allocate memory. Consider for example this call sequence: vgic_its_cmd_handle_mapi itte->irq = vgic_add_lpi(kvm, lpi_nr); update_lpi_config(kvm, itte->irq, NULL); ret = kvm_read_guest(kvm, propbase + irq->intid ^^^^ kaboom? Instead, return an error pointer from vgic_add_lpi and check the return value from its single caller. Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1bd8adb..d06330a 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -51,7 +51,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); if (!irq) - return NULL; + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&irq->lpi_list); INIT_LIST_HEAD(&irq->ap_list); @@ -522,7 +522,8 @@ static void its_free_itte(struct kvm *kvm, struct its_itte *itte) list_del(&itte->itte_list); /* This put matches the get in vgic_add_lpi. */ - vgic_put_irq(kvm, itte->irq); + if (itte->irq) + vgic_put_irq(kvm, itte->irq); kfree(itte); } @@ -713,10 +714,11 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); u32 coll_id = its_cmd_get_collection(its_cmd); - struct its_itte *itte; + struct its_itte *itte, *new_itte = NULL; struct its_device *device; struct its_collection *collection, *new_coll = NULL; int lpi_nr; + struct vgic_irq *irq; device = find_its_device(its, device_id); if (!device) @@ -747,13 +749,24 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, return -ENOMEM; } + new_itte = itte; itte->event_id = event_id; list_add_tail(&itte->itte_list, &device->itt_head); } itte->collection = collection; itte->lpi = lpi_nr; - itte->irq = vgic_add_lpi(kvm, lpi_nr); + + irq = vgic_add_lpi(kvm, lpi_nr); + if (IS_ERR(irq)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + if (new_itte) + its_free_itte(kvm, new_itte); + return PTR_ERR(irq); + } + itte->irq = irq; + update_affinity_itte(kvm, itte); /* -- cgit v0.10.2 From 2cccbb368a2bf27d98cf36bb424fbbf5572c0fab Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 Aug 2016 22:05:42 +0200 Subject: KVM: arm64: vgic-its: Plug race in vgic_put_irq Right now the following sequence of events can happen: 1. Thread X calls vgic_put_irq 2. Thread Y calls vgic_add_lpi 3. Thread Y gets lpi_list_lock 4. Thread X drops the ref count to 0 and blocks on lpi_list_lock 5. Thread Y finds the irq via the lpi_list_lock, raises the ref count to 1, and release the lpi_list_lock. 6. Thread X proceeds and frees the irq. Avoid this by holding the spinlock around the kref_put. Reviewed-by: Andre Przywara Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e7aeac7..e83b7fe 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -117,17 +117,17 @@ static void vgic_irq_release(struct kref *ref) void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { - struct vgic_dist *dist; + struct vgic_dist *dist = &kvm->arch.vgic; if (irq->intid < VGIC_MIN_LPI) return; - if (!kref_put(&irq->refcount, vgic_irq_release)) + spin_lock(&dist->lpi_list_lock); + if (!kref_put(&irq->refcount, vgic_irq_release)) { + spin_unlock(&dist->lpi_list_lock); return; + }; - dist = &kvm->arch.vgic; - - spin_lock(&dist->lpi_list_lock); list_del(&irq->lpi_list); dist->lpi_list_count--; spin_unlock(&dist->lpi_list_lock); -- cgit v0.10.2 From 4da449ae1df9cfeb167e78f250b250eff64bc65e Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Tue, 9 Aug 2016 20:46:16 +0200 Subject: netfilter: nft_exthdr: Add size check on u8 nft_exthdr attributes Fix the direct assignment of offset and length attributes included in nft_exthdr structure from u32 data to u8. Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index ba7aed1..82c264e 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,6 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); + u32 offset, len; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -66,9 +67,15 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, tb[NFTA_EXTHDR_LEN] == NULL) return -EINVAL; + offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); + len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + + if (offset > U8_MAX || len > U8_MAX) + return -ERANGE; + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); - priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); - priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + priv->offset = offset; + priv->len = len; priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); return nft_validate_register_store(ctx, priv->dreg, NULL, -- cgit v0.10.2 From c7de573471832dff7d31f0c13b0f143d6f017799 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 29 Jul 2016 03:17:58 +0300 Subject: usb: dwc3: gadget: increment request->actual once When using SG lists, we would end up setting request->actual to: num_mapped_sgs * (request->length - count) Let's fix that up by incrementing request->actual only once. Cc: Reported-by: Brian E Rogers Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8f8c215..863c306 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2013,14 +2013,6 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, s_pkt = 1; } - /* - * We assume here we will always receive the entire data block - * which we should receive. Meaning, if we program RX to - * receive 4K but we receive only 2K, we assume that's all we - * should receive and we simply bounce the request back to the - * gadget driver for further processing. - */ - req->request.actual += req->request.length - count; if (s_pkt) return 1; if ((event->status & DEPEVT_STATUS_LST) && @@ -2040,6 +2032,7 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_trb *trb; unsigned int slot; unsigned int i; + int count = 0; int ret; do { @@ -2054,6 +2047,8 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, slot++; slot %= DWC3_TRB_NUM; trb = &dep->trb_pool[slot]; + count += trb->size & DWC3_TRB_SIZE_MASK; + ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, event, status); @@ -2061,6 +2056,14 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, break; } while (++i < req->request.num_mapped_sgs); + /* + * We assume here we will always receive the entire data block + * which we should receive. Meaning, if we program RX to + * receive 4K but we receive only 2K, we assume that's all we + * should receive and we simply bounce the request back to the + * gadget driver for further processing. + */ + req->request.actual += req->request.length - count; dwc3_gadget_giveback(dep, req, status); if (ret) -- cgit v0.10.2 From e5b36ae2f851024d43c76e51f395d32ce8d769ce Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 10 Aug 2016 11:13:26 +0300 Subject: usb: dwc3: gadget: fix for short pkts during chained xfers DWC3 has one interesting peculiarity with chained transfers. If we setup N chained transfers and we get a short packet before processing all N TRBs, DWC3 will (conditionally) issue a XferComplete or XferInProgress event and retire all TRBs from the one which got a short packet to the last without clearing their HWO bits. This means SW must clear HWO bit manually, which this patch is doing. Cc: Cc: Brian E Rogers Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 863c306..241f5c7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1955,7 +1955,8 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc) static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_request *req, struct dwc3_trb *trb, - const struct dwc3_event_depevt *event, int status) + const struct dwc3_event_depevt *event, int status, + int chain) { unsigned int count; unsigned int s_pkt = 0; @@ -1964,6 +1965,19 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, dep->queued_requests--; trace_dwc3_complete_trb(dep, trb); + /* + * If we're in the middle of series of chained TRBs and we + * receive a short transfer along the way, DWC3 will skip + * through all TRBs including the last TRB in the chain (the + * where CHN bit is zero. DWC3 will also avoid clearing HWO + * bit and SW has to do it manually. + * + * We're going to do that here to avoid problems of HW trying + * to use bogus TRBs for transfers. + */ + if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO)) + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN) /* * We continue despite the error. There is not much we @@ -1975,6 +1989,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, */ dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n", dep->name, trb); + count = trb->size & DWC3_TRB_SIZE_MASK; if (dep->direction) { @@ -2036,10 +2051,13 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, int ret; do { + int chain; + req = next_request(&dep->started_list); if (WARN_ON_ONCE(!req)) return 1; + chain = req->request.num_mapped_sgs > 0; i = 0; do { slot = req->first_trb_index + i; @@ -2049,9 +2067,8 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, trb = &dep->trb_pool[slot]; count += trb->size & DWC3_TRB_SIZE_MASK; - ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, - event, status); + event, status, chain); if (ret) break; } while (++i < req->request.num_mapped_sgs); -- cgit v0.10.2 From 7c705dfe2ebe731c8fd068623b6b4df2d3512c08 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 10 Aug 2016 12:35:30 +0300 Subject: usb: dwc3: gadget: always cleanup all TRBs If we stop earlier due to short packet, we will not be able to giveback all TRBs. Cc: Cc: Brian E Rogers Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 241f5c7..eb820e4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2028,7 +2028,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, s_pkt = 1; } - if (s_pkt) + if (s_pkt && !chain) return 1; if ((event->status & DEPEVT_STATUS_LST) && (trb->ctrl & (DWC3_TRB_CTRL_LST | -- cgit v0.10.2 From 4491ed5042f0419b22a4b08331adb54af31e2caa Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 1 Apr 2016 17:13:11 +0300 Subject: usb: dwc3: pci: add Intel Kabylake PCI ID Intel Kabylake PCH has the same DWC3 than Intel Sunrisepoint. Add the new ID to the supported devices. Cc: Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 45f5a23..2eb84d6 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -37,6 +37,7 @@ #define PCI_DEVICE_ID_INTEL_BXT 0x0aaa #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa #define PCI_DEVICE_ID_INTEL_APL 0x5aaa +#define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 static const struct acpi_gpio_params reset_gpios = { 0, 0, false }; static const struct acpi_gpio_params cs_gpios = { 1, 0, false }; @@ -227,6 +228,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; -- cgit v0.10.2 From 6f4deb18a505523eb7925d646574a95f9e982ff7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 8 Aug 2016 15:58:56 +0200 Subject: gpio: max730x: set gpiochip data pointer before using it gpiochip_add_data() has to be called before calling max7301_direction_input() [ 4.389883] Unable to handle kernel paging request for data at address 0x00000018 [ 4.397282] Faulting instruction address: 0xc01a8cbc [ 4.402023] Oops: Kernel access of bad area, sig: 11 [#1] [ 4.407331] PREEMPT CMPC885 [ 4.410131] CPU: 0 PID: 6 Comm: kworker/u2:0 Not tainted 4.5.0-gacdfdee #39 [ 4.418592] Workqueue: deferwq deferred_probe_work_func [ 4.423711] task: c60798b0 ti: c608a000 task.ti: c608a000 [ 4.429038] NIP: c01a8cbc LR: c01a8e24 CTR: c01ff028 [ 4.433953] REGS: c608bad0 TRAP: 0300 Not tainted (4.5.0-s3k-dev-gacdfdee-svn-dirty) [ 4.441847] MSR: 00009032 CR: 33039553 XER: a000f940 [ 4.448395] DAR: 00000018 DSISR: c0000000 GPR00: c01a8e24 c608bb80 c60798b0 c60d6f6c 00000004 00000002 07de2900 00700000 GPR08: 00000000 00000000 c608a000 00001032 35039553 00000000 c002f37c c6010b64 GPR16: c6010a48 c6010a14 c6010a00 00000000 c0450000 c0453568 c0453438 c050db14 GPR24: c62662bc 00000009 ffffffaa c60d6f5d 00000001 00000000 00000000 00000000 [ 4.480371] NIP [c01a8cbc] max7301_direction_input+0x20/0x9c [ 4.485951] LR [c01a8e24] __max730x_probe+0xec/0x138 [ 4.490812] Call Trace: [ 4.493268] [c608bba0] [c01a8e24] __max730x_probe+0xec/0x138 [ 4.498878] [c608bbc0] [c01cc368] driver_probe_device+0x190/0x38c [ 4.504895] [c608bbf0] [c01ca918] bus_for_each_drv+0x58/0xb4 [ 4.510489] [c608bc20] [c01cc04c] __device_attach+0x8c/0x110 [ 4.516082] [c608bc50] [c01cab80] bus_probe_device+0x34/0xb8 [ 4.521673] [c608bc70] [c01c96c8] device_add+0x3c0/0x598 [ 4.526925] [c608bcb0] [c0200f90] spi_add_device+0x114/0x160 [ 4.532512] [c608bcd0] [c02018d0] spi_register_master+0x6e0/0x7c8 [ 4.538537] [c608bd20] [c02019fc] devm_spi_register_master+0x44/0x8c [ 4.544824] [c608bd40] [c0203854] of_fsl_spi_probe+0x458/0x57c [ 4.550587] [c608bda0] [c01cd828] platform_drv_probe+0x30/0x74 [ 4.556366] [c608bdb0] [c01cc368] driver_probe_device+0x190/0x38c [ 4.562383] [c608bde0] [c01ca918] bus_for_each_drv+0x58/0xb4 [ 4.567977] [c608be10] [c01cc04c] __device_attach+0x8c/0x110 [ 4.573572] [c608be40] [c01cab80] bus_probe_device+0x34/0xb8 [ 4.579170] [c608be60] [c01cb9b4] deferred_probe_work_func+0xa4/0xc4 [ 4.585438] [c608be80] [c0029c04] process_one_work+0x22c/0x414 [ 4.591201] [c608bea0] [c002a100] worker_thread+0x314/0x5c0 [ 4.596722] [c608bef0] [c002f444] kthread+0xc8/0xcc [ 4.601538] [c608bf40] [c000af84] ret_from_kernel_thread+0x5c/0x64 [ 4.607596] Instruction dump: [ 4.610530] 7c0803a6 bba10014 38210020 4e800020 7c0802a6 9421ffe0 38840004 bf810010 [ 4.618188] 90010024 549cf0be 83c30010 549d0f7c <813e0018> 7fc3f378 7d3f2430 57ff07fe [ 4.626041] ---[ end trace 303adb021dd4caf2 ]--- Cc: stable@vger.kernel.org fixes: 5e45e01916197 ("gpio: max730x: use gpiochip data pointer") Signed-off-by: Christophe Leroy Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c index 0880736..946d091 100644 --- a/drivers/gpio/gpio-max730x.c +++ b/drivers/gpio/gpio-max730x.c @@ -192,6 +192,10 @@ int __max730x_probe(struct max7301 *ts) ts->chip.parent = dev; ts->chip.owner = THIS_MODULE; + ret = gpiochip_add_data(&ts->chip, ts); + if (ret) + goto exit_destroy; + /* * initialize pullups according to platform data and cache the * register values for later use. @@ -213,10 +217,6 @@ int __max730x_probe(struct max7301 *ts) } } - ret = gpiochip_add_data(&ts->chip, ts); - if (ret) - goto exit_destroy; - return ret; exit_destroy: -- cgit v0.10.2 From 5b236d0fde21d88351420ef0b9a6cb7aeeea0c54 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:51:58 +0000 Subject: pinctrl: meson: Drop pinctrl_unregister for devm_ registered device It's not necessary to unregister pin controller device registered with devm_pinctrl_register() and using pinctrl_unregister() leads to a double free. This is detected by Coccinelle semantic patch. Fixes: e649f7ec8c5f ("pinctrl: meson: Use devm_pinctrl_register() for pinctrl registration") Signed-off-by: Wei Yongjun Reviewed-by: Dmitry Torokhov Acked-by: Kevin Hilman Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 11623c6..44e69c9 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -727,13 +727,7 @@ static int meson_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(pc->pcdev); } - ret = meson_gpiolib_register(pc); - if (ret) { - pinctrl_unregister(pc->pcdev); - return ret; - } - - return 0; + return meson_gpiolib_register(pc); } static struct platform_driver meson_pinctrl_driver = { -- cgit v0.10.2 From b120a3c286520ca465c54e8afa442be10560053b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:52:57 +0000 Subject: pinctrl: pistachio: Drop pinctrl_unregister for devm_ registered device It's not necessary to unregister pin controller device registered with devm_pinctrl_register() and using pinctrl_unregister() leads to a double free. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index c6d410e..7bad200 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1432,7 +1432,6 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev) { struct pistachio_pinctrl *pctl; struct resource *res; - int ret; pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL); if (!pctl) @@ -1464,13 +1463,7 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(pctl->pctldev); } - ret = pistachio_gpio_register(pctl); - if (ret < 0) { - pinctrl_unregister(pctl->pctldev); - return ret; - } - - return 0; + return pistachio_gpio_register(pctl); } static struct platform_driver pistachio_pinctrl_driver = { -- cgit v0.10.2 From 8cf4345575a416e6856a6856ac6eaa31ad883126 Mon Sep 17 00:00:00 2001 From: "Agrawal, Nitesh-kumar" Date: Tue, 26 Jul 2016 08:28:19 +0000 Subject: pinctrl/amd: Remove the default de-bounce time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the function amd_gpio_irq_enable() and amd_gpio_direction_input(), remove the code which is setting the default de-bounce time to 2.75ms. The driver code shall use the same settings as specified in BIOS. Any default assignment impacts TouchPad behaviour when the LevelTrig is set to EDGE FALLING. Cc: stable@vger.kernel.org Reviewed-by:  Ken Xue Signed-off-by: Nitesh Kumar Agrawal Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 634b4d3..b3e7723 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -43,17 +43,6 @@ static int amd_gpio_direction_input(struct gpio_chip *gc, unsigned offset) spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + offset * 4); - /* - * Suppose BIOS or Bootloader sets specific debounce for the - * GPIO. if not, set debounce to be 2.75ms and remove glitch. - */ - if ((pin_reg & DB_TMR_OUT_MASK) == 0) { - pin_reg |= 0xf; - pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); - pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF; - pin_reg &= ~BIT(DB_TMR_LARGE_OFF); - } - pin_reg &= ~BIT(OUTPUT_ENABLE_OFF); writel(pin_reg, gpio_dev->base + offset * 4); spin_unlock_irqrestore(&gpio_dev->lock, flags); @@ -326,15 +315,6 @@ static void amd_gpio_irq_enable(struct irq_data *d) spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); - /* - Suppose BIOS or Bootloader sets specific debounce for the - GPIO. if not, set debounce to be 2.75ms. - */ - if ((pin_reg & DB_TMR_OUT_MASK) == 0) { - pin_reg |= 0xf; - pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); - pin_reg &= ~BIT(DB_TMR_LARGE_OFF); - } pin_reg |= BIT(INTERRUPT_ENABLE_OFF); pin_reg |= BIT(INTERRUPT_MASK_OFF); writel(pin_reg, gpio_dev->base + (d->hwirq)*4); -- cgit v0.10.2 From e95d0dfb229fffe96dc4c29054f6c7a7302e111e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Aug 2016 18:18:33 +0300 Subject: pinctrl: intel: merrifield: Add missed header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On x86 builds the absense of makes static analyzer and compiler unhappy which fails to build the driver. CHECK drivers/pinctrl/intel/pinctrl-merrifield.c drivers/pinctrl/intel/pinctrl-merrifield.c:518:17: error: undefined identifier 'readl' drivers/pinctrl/intel/pinctrl-merrifield.c:570:17: error: undefined identifier 'readl' drivers/pinctrl/intel/pinctrl-merrifield.c:575:9: error: undefined identifier 'writel' drivers/pinctrl/intel/pinctrl-merrifield.c:645:17: error: undefined identifier 'readl' CC drivers/pinctrl/intel/pinctrl-merrifield.o drivers/pinctrl/intel/pinctrl-merrifield.c: In function ‘mrfld_pin_dbg_show’: drivers/pinctrl/intel/pinctrl-merrifield.c:518:10: error: implicit declaration of function ‘readl’ [-Werror=implicit-function-declaration] value = readl(bufcfg); ^ drivers/pinctrl/intel/pinctrl-merrifield.c: In function ‘mrfld_update_bufcfg’: drivers/pinctrl/intel/pinctrl-merrifield.c:575:2: error: implicit declaration of function ‘writel’ [-Werror=implicit-function-declaration] writel(value, bufcfg); ^ cc1: some warnings being treated as errors Add header to the top of the module. Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support") Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index eb4990f..7fb7656 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include -- cgit v0.10.2 From b06bc7ec4da26dccda040a65896ea22ee6638a7a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:31 +0100 Subject: drm/i915: Flush GT idle status upon reset Upon resetting the GPU, we force the engines to be idle by clearing their request lists. However, I neglected to clear the GT active status and so the next request following the reset was not marking the device as busy again. (We had to wait until any outstanding retire worker finally ran and cleared the active status.) Fixes: 67d97da34917 ("drm/i915: Only start retire worker when idle") Testcase: igt/pm_rps/reset Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit b913b33c43db849778f044d4b9e74b167898a9bc) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1168150..27ef10e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3169,6 +3169,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) } intel_ring_init_seqno(engine, engine->last_submitted_seqno); + + engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_reset(struct drm_device *dev) @@ -3186,6 +3188,7 @@ void i915_gem_reset(struct drm_device *dev) for_each_engine(engine, dev_priv) i915_gem_reset_engine_cleanup(engine); + mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); i915_gem_context_reset(dev); -- cgit v0.10.2 From c2a4c5b75a717db8bbe1c2fad6473063a7d2f930 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 3 Aug 2016 13:39:42 -0500 Subject: drm/amdgpu: Fix memory trashing if UVD ring test fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fence_put was called on an uninitialized variable. Reviewed-by: Christian König Signed-off-by: Jay Cornwall Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b11f4e8..4aa993d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1187,7 +1187,8 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; } -error: fence_put(fence); + +error: return r; } -- cgit v0.10.2 From 18b43e89d295cc65151c505c643c98fb2c320e59 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Thu, 4 Aug 2016 17:56:53 -0700 Subject: ARC: Call trace_hardirqs_on() before enabling irqs trace_hardirqs_on_caller() in lockdep.c expects to be called before, not after interrupts are actually enabled. The following comment in kernel/locking/lockdep.c substantiates this claim: " /* * We're enabling irqs and according to our state above irqs weren't * already enabled, yet we find the hardware thinks they are in fact * enabled.. someone messed up their IRQ state tracing. */ " An example can be found in include/linux/irqflags.h: do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) Without this change, we hit the following DEBUG_LOCKS_WARN_ON. [ 7.760000] ------------[ cut here ]------------ [ 7.760000] WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:2711 resume_user_mode_begin+0x48/0xf0 [ 7.770000] DEBUG_LOCKS_WARN_ON(!irqs_disabled()) [ 7.780000] Modules linked in: [ 7.780000] CPU: 0 PID: 1 Comm: init Not tainted 4.7.0-00003-gc668bb9-dirty #366 [ 7.790000] [ 7.790000] Stack Trace: [ 7.790000] arc_unwind_core.constprop.1+0xa4/0x118 [ 7.800000] warn_slowpath_fmt+0x72/0x158 [ 7.800000] resume_user_mode_begin+0x48/0xf0 [ 7.810000] ---[ end trace 6f6a7a8fae20d2f0 ]--- Signed-off-by: Daniel Mentz Cc: Signed-off-by: Vineet Gupta diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index c1d3645..4c6eed8 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -188,10 +188,10 @@ static inline int arch_irqs_disabled(void) .endm .macro IRQ_ENABLE scratch + TRACE_ASM_IRQ_ENABLE lr \scratch, [status32] or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) flag \scratch - TRACE_ASM_IRQ_ENABLE .endm #endif /* __ASSEMBLY__ */ -- cgit v0.10.2 From 45c3b08a117e2232fc8d7b9e849ead36386f4f96 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 Jun 2016 16:38:27 +0200 Subject: ARC: Elide redundant setup of DMA callbacks For resources shared by all cores such as SLC and IOC, only the master core needs to do any setups / enabling / disabling etc. Cc: Signed-off-by: Vineet Gupta diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 5a294b2..0b10efe 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -921,6 +921,15 @@ void arc_cache_init(void) printk(arc_cache_mumbojumbo(0, str, sizeof(str))); + /* + * Only master CPU needs to execute rest of function: + * - Assume SMP so all cores will have same cache config so + * any geomtry checks will be same for all + * - IOC setup / dma callbacks only need to be setup once + */ + if (cpu) + return; + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; -- cgit v0.10.2 From fae82e59d2ff7d789878c9eb3a007ae7eb6641b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 16 Jul 2016 18:42:36 +0100 Subject: drm/i915: Handle ENOSPC after failing to insert a mappable node Even after adding individual page support for GTT mmaping, we can still fail to find any space within the mappable region, and drm_mm_insert_node() will then report ENOSPC. We have to then handle this error by using the shmem access to the pages. Fixes: b50a53715f09 ("drm/i915: Support for pread/pwrite ... objects") Testcase: igt/gem_concurrent_blit Signed-off-by: Chris Wilson Cc: Ankitprasad Sharma Cc: Tvrtko Ursulin (cherry picked from commit d1054ee492a89b134fb0ac527b0714c277ae9c0f) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 27ef10e..aceaad0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1306,7 +1306,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * textures). Fallback to the shmem path in that case. */ } - if (ret == -EFAULT) { + if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); else if (i915_gem_object_has_struct_page(obj)) -- cgit v0.10.2 From 5728e0de741a3581e9900c5cbee3a51425daf211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:28 +0300 Subject: drm/i915: Fix iboost setting for DDI with 4 lanes on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec says: "For DDIA with x4 capability (DDI_BUF_CTL DDIA Lane Capability Control = DDIA x4), the I_boost value has to be programmed in both tx_blnclegsctl_0 and tx_blnclegsctl_4." Currently we only program tx_blnclegsctl_0. Let's do the other one as well. Cc: stable@vger.kernel.org Fixes: f8896f5d58e6 ("drm/i915/skl: Buffer translation improvements") Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall (cherry picked from commit a7d8dbc07c8f0faaace983b1e4c6e9495dd0aa75) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ce14fe0..5c06413 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1536,6 +1536,7 @@ enum skl_disp_power_wells { #define BALANCE_LEG_MASK(port) (7<<(8+3*(port))) /* Balance leg disable bits */ #define BALANCE_LEG_DISABLE_SHIFT 23 +#define BALANCE_LEG_DISABLE(port) (1 << (23 + (port))) /* * Fence registers diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index dd1d6fe..75354cd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1379,14 +1379,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) TRANS_CLK_SEL_DISABLED); } -static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, + enum port port, uint8_t iboost) { + u32 tmp; + + tmp = I915_READ(DISPIO_CR_TX_BMU_CR0); + tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port)); + if (iboost) + tmp |= iboost << BALANCE_LEG_SHIFT(port); + else + tmp |= BALANCE_LEG_DISABLE(port); + I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); +} + +static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum port port = intel_dig_port->port; + int type = encoder->type; const struct ddi_buf_trans *ddi_translations; uint8_t iboost; uint8_t dp_iboost, hdmi_iboost; int n_entries; - u32 reg; /* VBT may override standard boost values */ dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; @@ -1428,16 +1444,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, return; } - reg = I915_READ(DISPIO_CR_TX_BMU_CR0); - reg &= ~BALANCE_LEG_MASK(port); - reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port)); - - if (iboost) - reg |= iboost << BALANCE_LEG_SHIFT(port); - else - reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port); + _skl_ddi_set_iboost(dev_priv, port, iboost); - I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg); + if (port == PORT_A && intel_dig_port->max_lanes == 4) + _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, @@ -1568,7 +1578,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - skl_ddi_set_iboost(dev_priv, level, port, encoder->type); + skl_ddi_set_iboost(encoder, level); else if (IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); -- cgit v0.10.2 From 7ff9a55614712adf13ce7990565be0263e620f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:30 +0300 Subject: drm/i915: Program iboost settings for HDMI/DVI on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we fail to program the iboost stuff for HDMI/DVI. Let's remedy that. Cc: stable@vger.kernel.org Fixes: f8896f5d58e6 ("drm/i915/skl: Buffer translation improvements") Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall (cherry picked from commit 8d8bb85eb7d859aa9bbe36e588690a1d22af7608) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 75354cd..415c061 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -388,6 +388,40 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } +static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) +{ + int n_hdmi_entries; + int hdmi_level; + int hdmi_default_entry; + + hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + + if (IS_BROXTON(dev_priv)) + return hdmi_level; + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + hdmi_default_entry = 8; + } else if (IS_BROADWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } else if (IS_HASWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_default_entry = 6; + } else { + WARN(1, "ddi translation table missing\n"); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } + + /* Choose a good default if VBT is badly populated */ + if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || + hdmi_level >= n_hdmi_entries) + hdmi_level = hdmi_default_entry; + + return hdmi_level; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. The buffer values are different for FDI and DP modes, @@ -399,7 +433,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry, + int i, n_hdmi_entries, n_dp_entries, n_edp_entries, size; int hdmi_level; enum port port; @@ -410,7 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) const struct ddi_buf_trans *ddi_translations; port = intel_ddi_get_encoder_port(encoder); - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + hdmi_level = intel_ddi_hdmi_level(dev_priv, port); if (IS_BROXTON(dev_priv)) { if (encoder->type != INTEL_OUTPUT_HDMI) @@ -430,7 +464,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) skl_get_buf_trans_edp(dev_priv, &n_edp_entries); ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; /* If we're boosting the current, set bit 31 of trans1 */ if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || dev_priv->vbt.ddi_port_info[port].dp_boost_level) @@ -456,7 +489,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } else if (IS_HASWELL(dev_priv)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; @@ -464,7 +496,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; @@ -474,7 +505,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } switch (encoder->type) { @@ -505,11 +535,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) if (encoder->type != INTEL_OUTPUT_HDMI) return; - /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; - /* Entry 9 is for HDMI: */ I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); @@ -1647,6 +1672,10 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_dp_stop_link_train(intel_dp); } else if (type == INTEL_OUTPUT_HDMI) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + int level = intel_ddi_hdmi_level(dev_priv, port); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + skl_ddi_set_iboost(intel_encoder, level); intel_hdmi->set_infoframes(encoder, crtc->config->has_hdmi_sink, -- cgit v0.10.2 From d8b6161f7294b63180da443c9ff48c7efce182eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 2 Aug 2016 14:07:33 +0300 Subject: drm/i915: Clean up the extra RPM ref on CHV with i915.enable_rc6=0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the CHV early bail out from intel_cleanup_gt_powersave() so that we'll clean up the extra RPM reference held due to i915.enable_rc6=0. Cc: Imre Deak Fixes: b268c699aca5 ("drm/i915: refactor RPM disabling due to RC6 being disabled") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470136053-23276-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 8dac1e1f2068321fb4b7062d3c5408971f7a7e35) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4f3fcc..1ff5c63 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6573,9 +6573,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_CHERRYVIEW(dev_priv)) - return; - else if (IS_VALLEYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); if (!i915.enable_rc6) -- cgit v0.10.2 From 0a491b96aa59a7232f6c1a81414aa57fb8de8594 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 08:43:53 +0100 Subject: drm/i915/fbc: FBC causes display flicker when VT-d is enabled on Skylake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Erratum SKL075: Display Flicker May Occur When Both VT-d And FBC Are Enabled "Display flickering may occur when both FBC (Frame Buffer Compression) and VT - d (Intel® Virtualization Technology for Directed I/O) are enabled and in use by the display controller." Ville found the w/a name in the database: WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt and also dug out that it affects Broxton. v2: Log when the quirk is applied. v3: Ensure i915.enable_fbc is false when !HAS_FBC() v4: Fix function name after rebase v5: Add Broxton to the workaround Note for backporting to stable, we need to add #define mkwrite_device_info(ptr) \ ((struct intel_device_info *)INTEL_INFO(ptr)) Signed-off-by: Chris Wilson Cc: Paulo Zanoni Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/1470296633-20388-1-git-send-email-chris@chris-wilson.co.uk (cherry picked from commit 36dbc4d76918d7557b686f807106dcc799174b12) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 6a7ad3e..3836a1c 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1230,12 +1230,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (i915.enable_fbc >= 0) return !!i915.enable_fbc; + if (!HAS_FBC(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv)) return 1; return 0; } +static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) +{ +#ifdef CONFIG_INTEL_IOMMU + /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ + if (intel_iommu_gfx_mapped && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { + DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); + return true; + } +#endif + + return false; +} + /** * intel_fbc_init - Initialize FBC * @dev_priv: the i915 device @@ -1253,6 +1270,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) fbc->active = false; fbc->work.scheduled = false; + if (need_fbc_vtd_wa(dev_priv)) + mkwrite_device_info(dev_priv)->has_fbc = false; + i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); -- cgit v0.10.2 From 2ca17b87e85ffada1e3da941d5c1c9ee4f56ca9f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 09:09:53 +0100 Subject: drm/i915: Add missing rpm wakelock to GGTT pread Joonas spotted a discrepancy between the pwrite and pread ioctls, in that pwrite takes the rpm wakelock around its GGTT access, The wakelock is required in order for the GTT to function. In disregard for the current convention, we take the rpm wakelock around the access itself rather than around the struct_mutex as the nesting is not strictly required and such ordering will one day be fixed by explicitly noting the barrier dependencies between the GGTT and rpm. Fixes: b50a53715f09 ("drm/i915: Support for pread/pwrite ...") Reported-by: Joonas Lahtinen Signed-off-by: Chris Wilson Cc: Ankitprasad Sharma Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/1470298193-21765-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit 1dd5b6f2020389e75bb3d269c038497f065e68c9) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aceaad0..a77ce99 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -879,9 +879,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ - if (ret == -EFAULT || ret == -ENODEV) + if (ret == -EFAULT || ret == -ENODEV) { + intel_runtime_pm_get(to_i915(dev)); ret = i915_gem_gtt_pread(dev, obj, args->size, args->offset, args->data_ptr); + intel_runtime_pm_put(to_i915(dev)); + } out: drm_gem_object_unreference(&obj->base); -- cgit v0.10.2 From 3cffb0a44750726cdc1cc07399efe3cbb45e028b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Aug 2016 17:09:00 +0100 Subject: drm/i915: Acquire audio powerwell for HD-Audio registers On Haswell/Broadwell, the HD-Audio block is inside the HDMI/display power well and so the sna-hda audio codec acquires the display power well while it is operational. However, Skylake separates the powerwells again, but yet we still need the audio powerwell to setup the registers. (But then the hardware uses those registers even while powered off???) Acquiring the powerwell around setting the chicken bits when setting up the audio channel does at least silence the WARNs from touching our registers whilst unpowered. We silence our own test cases, but maybe there is a latent bug in using the audio channel? v2: Grab both rpm wakelock and audio wakelock Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96214 Fixes: 03b135cebc47 "ALSA: hda - remove dependency on i915 power well for SKL") Signed-off-by: Chris Wilson Cc: Libin Yang Cc: Takashi Iwai Cc: Marius Vlad Tested-by: Hans de Goede Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/1470240540-29004-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter (cherry picked from commit d838a110f0b310d408ebe6b5a97e36ec27555ebf) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 6700a7b..d32f586 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) return; + i915_audio_component_get_power(dev); + /* * Enable/disable generating the codec wake signal, overriding the * internal logic to generate the codec wake to controller. @@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, I915_WRITE(HSW_AUD_CHICKENBIT, tmp); usleep_range(1000, 1500); } + + i915_audio_component_put_power(dev); } /* Get CDCLK in kHz */ @@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, !IS_HASWELL(dev_priv)) return 0; + i915_audio_component_get_power(dev); mutex_lock(&dev_priv->av_mutex); /* 1. get the pipe */ intel_encoder = dev_priv->dig_port_map[port]; @@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, unlock: mutex_unlock(&dev_priv->av_mutex); + i915_audio_component_put_power(dev); return err; } -- cgit v0.10.2 From 58e311b09c319183254d9220c50a533e7157c9ab Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 4 Aug 2016 14:08:00 -0700 Subject: drm/i915/gen9: Give one extra block per line for SKL plane WM calculations The bspec was updated a couple weeks ago to add an extra block per line to plane watermark calculations for linear pixel formats. Bspec update 115327 description: "Gen9+ - Updated the plane blocks per line calculation for linear cases. Adds +1 for all linear cases to handle the non-block aligned stride cases." Cc: Lyude Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1470344880-27394-1-git-send-email-matthew.d.roper@intel.com Reviewed-by: Lyude (cherry picked from commit 055c3ff69d440928964228455ec29b071258d5fa) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1ff5c63..3c7b382 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3344,6 +3344,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, plane_bytes_per_line *= 4; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line /= 4; + } else if (tiling == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; } else { plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); } -- cgit v0.10.2 From 85bf59d188721dca37bc8276457e68351213f38f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 2 Aug 2016 15:21:57 +0300 Subject: drm/i915: Fix iboost setting for SKL Y/U DP DDI buffer translation entry 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec was recently fixed to have the correct iboost setting for the SKL Y/U DP DDI buffer translation table entry 2. Update our tables to match. Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470140517-13011-1-git-send-email-ville.syrjala@linux.intel.com Cc: stable@vger.kernel.org Reviewed-by: David Weinehall (cherry picked from commit 5ac9056753e79ac5ad1ccc3c99b311688e46e8c9) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 415c061..1a7efac 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { { 0x0000201B, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x1 }, { 0x80009010, 0x000000C0, 0x1 }, { 0x0000201B, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x1 }, @@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { { 0x00000018, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x3 }, { 0x80009010, 0x000000C0, 0x3 }, { 0x00000018, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x3 }, -- cgit v0.10.2 From bf74c93cd7696793eceadaf02b176500523e0b93 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 2 Aug 2016 09:36:53 +0100 Subject: drm/i915: fix WaInsertDummyPushConstPs As pointed out by Chris Harris, we are using the wrong WA name, it should in fact be WaToEnableHwFixForPushConstHWBug, also it should be applied from C0 onwards for both BXT and KBL. Fixes: 7b9005cd45f3 ("drm/i915: Add WaInsertDummyPushConstP for bxt and kbl") Cc: Chris Harris Cc: Mika Kuoppala Reported-by: Chris Harris Signed-off-by: Matthew Auld Reviewed-by: Arun Siluvery Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470127013-29653-1-git-send-email-matthew.auld@intel.com (cherry picked from commit 575e3ccbce4582395d57612b289178bad4af3be8) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cca7792..1d3161b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1178,8 +1178,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2)); - /* WaInsertDummyPushConstPs:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1222,8 +1222,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_RO_PERF_DIS); - /* WaInsertDummyPushConstPs:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); -- cgit v0.10.2 From 3871f42a57efcdc6a9da751a8cb6fa196c212289 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 5 Aug 2016 19:04:40 +0100 Subject: drm/i915: fix aliasing_ppgtt leak In i915_ggtt_cleanup_hw we need to remember to free aliasing_ppgtt. This fixes the following kmemleak message: unreferenced object 0xffff880213cca000 (size 8192): comm "modprobe", pid 1298, jiffies 4294745402 (age 703.930s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc_trace+0x142/0x1d0 [] i915_gem_init_ggtt+0x10f/0x210 [i915] [] i915_gem_init+0x5b/0xd0 [i915] [] i915_driver_load+0x97a/0x1460 [i915] [] i915_pci_probe+0x4f/0x70 [i915] [] local_pci_probe+0x45/0xa0 [] pci_device_probe+0x103/0x150 [] driver_probe_device+0x22c/0x440 [] __driver_attach+0xd1/0xf0 [] bus_for_each_dev+0x6c/0xc0 [] driver_attach+0x1e/0x20 [] bus_add_driver+0x1c3/0x280 [] driver_register+0x60/0xe0 [] __pci_register_driver+0x4c/0x50 [] 0xffffffffa013605b Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Fixes: b18b6bde300e ("drm/i915/bdw: Free PPGTT struct") Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470420280-21417-1-git-send-email-matthew.auld@intel.com (cherry picked from commit cb7f27601c81a1e0454e9461e96f65b31fafbea0) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10f1e32..7a30af7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2873,6 +2873,7 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev) struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); } i915_gem_cleanup_stolen(dev); -- cgit v0.10.2 From dfa2997055659b4e706a85fba481050cc7e7ad82 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 5 Aug 2016 23:28:27 +0300 Subject: drm/i915: Fix modeset handling during gpu reset, v5. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function would call drm_modeset_lock_all, while the suspend/resume functions already have their own locking. Fix this by factoring out __intel_display_resume, and calling the atomic helpers for duplicating atomic state and disabling all crtc's during suspend. Changes since v1: - Deal with -EDEADLK right after lock_all and clean up calls to hw readout. - Always take all modeset locks so updates during gpu reset are blocked. Changes since v2: - Fix deadlock in intel_update_primary_planes. - Move WARN_ON(EDEADLK) to __intel_display_resume. - pctx -> ctx - only call __intel_display_resume on success in intel_display_resume. Changes since v3: - Rebase on top of dev_priv -> dev change. - Use drm_modeset_lock_all_ctx instead of drm_modeset_lock_all. Changes since v4 [by vsyrjala]: - Deal with skip_intermediate_wm - Update comment w.r.t. mode_config.mutex vs. ->detect() - Rebase due to INTEL_GEN() etc. Signed-off-by: Maarten Lankhorst Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Cc: stable@vger.kernel.org Tested-by: Ville Syrjälä Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470428910-12125-2-git-send-email-ville.syrjala@linux.intel.com (cherry picked from commit 739748939974791b84629a8790527a16f76873a4) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 21f9390..20fe9d5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1854,6 +1854,7 @@ struct drm_i915_private { enum modeset_restore modeset_restore; struct mutex modeset_restore_lock; struct drm_atomic_state *modeset_restore_state; + struct drm_modeset_acquire_ctx reset_ctx; struct list_head vm_list; /* Global list of all address spaces */ struct i915_ggtt ggtt; /* VM representing the global address space */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c457eed..fbb8574 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3093,40 +3093,110 @@ static void intel_update_primary_planes(struct drm_device *dev) for_each_crtc(dev, crtc) { struct intel_plane *plane = to_intel_plane(crtc->primary); - struct intel_plane_state *plane_state; - - drm_modeset_lock_crtc(crtc, &plane->base); - plane_state = to_intel_plane_state(plane->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); if (plane_state->visible) plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } +} + +static int +__intel_display_resume(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state; + struct drm_crtc *crtc; + int i, ret; + + intel_modeset_setup_hw_state(dev); + i915_redisable_vga(dev); - drm_modeset_unlock_crtc(crtc); + if (!state) + return 0; + + for_each_crtc_in_state(state, crtc, crtc_state, i) { + /* + * Force recalculation even if we restore + * current state. With fast modeset this may not result + * in a modeset when the state is compatible. + */ + crtc_state->mode_changed = true; } + + /* ignore any reset values/BIOS leftovers in the WM registers */ + to_intel_atomic_state(state)->skip_intermediate_wm = true; + + ret = drm_atomic_commit(state); + + WARN_ON(ret == -EDEADLK); + return ret; } void intel_prepare_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state; + int ret; + /* no reset support for gen2 */ if (IS_GEN2(dev_priv)) return; - /* reset doesn't touch the display */ + /* + * Need mode_config.mutex so that we don't + * trample ongoing ->detect() and whatnot. + */ + mutex_lock(&dev->mode_config.mutex); + drm_modeset_acquire_init(ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, ctx); + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(ctx); + } + + /* reset doesn't touch the display, but flips might get nuked anyway, */ if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) return; - drm_modeset_lock_all(&dev_priv->drm); /* * Disabling the crtcs gracefully seems nicer. Also the * g33 docs say we should at least disable all the planes. */ - intel_display_suspend(&dev_priv->drm); + state = drm_atomic_helper_duplicate_state(dev, ctx); + if (IS_ERR(state)) { + ret = PTR_ERR(state); + state = NULL; + DRM_ERROR("Duplicating state failed with %i\n", ret); + goto err; + } + + ret = drm_atomic_helper_disable_all(dev, ctx); + if (ret) { + DRM_ERROR("Suspending crtc's failed with %i\n", ret); + goto err; + } + + dev_priv->modeset_restore_state = state; + state->acquire_ctx = ctx; + return; + +err: + drm_atomic_state_free(state); } void intel_finish_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state = dev_priv->modeset_restore_state; + int ret; + /* * Flips in the rings will be nuked by the reset, * so complete all pending flips so that user space @@ -3138,6 +3208,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) if (IS_GEN2(dev_priv)) return; + dev_priv->modeset_restore_state = NULL; + /* reset doesn't touch the display */ if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { /* @@ -3149,29 +3221,32 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) * FIXME: Atomic will make this obsolete since we won't schedule * CS-based flips (which might get lost in gpu resets) any more. */ - intel_update_primary_planes(&dev_priv->drm); - return; - } - - /* - * The display has been reset as well, - * so need a full re-initialization. - */ - intel_runtime_pm_disable_interrupts(dev_priv); - intel_runtime_pm_enable_interrupts(dev_priv); + intel_update_primary_planes(dev); + } else { + /* + * The display has been reset as well, + * so need a full re-initialization. + */ + intel_runtime_pm_disable_interrupts(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); - intel_modeset_init_hw(&dev_priv->drm); + intel_modeset_init_hw(dev); - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display.hpd_irq_setup) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); - intel_display_resume(&dev_priv->drm); + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); - intel_hpd_init(dev_priv); + intel_hpd_init(dev_priv); + } - drm_modeset_unlock_all(&dev_priv->drm); + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + mutex_unlock(&dev->mode_config.mutex); } static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) @@ -16174,9 +16249,10 @@ void intel_display_resume(struct drm_device *dev) struct drm_atomic_state *state = dev_priv->modeset_restore_state; struct drm_modeset_acquire_ctx ctx; int ret; - bool setup = false; dev_priv->modeset_restore_state = NULL; + if (state) + state->acquire_ctx = &ctx; /* * This is a cludge because with real atomic modeset mode_config.mutex @@ -16187,43 +16263,17 @@ void intel_display_resume(struct drm_device *dev) mutex_lock(&dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, 0); -retry: - ret = drm_modeset_lock_all_ctx(dev, &ctx); - - if (ret == 0 && !setup) { - setup = true; - - intel_modeset_setup_hw_state(dev); - i915_redisable_vga(dev); - } - - if (ret == 0 && state) { - struct drm_crtc_state *crtc_state; - struct drm_crtc *crtc; - int i; - - state->acquire_ctx = &ctx; - - /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - /* - * Force recalculation even if we restore - * current state. With fast modeset this may not result - * in a modeset when the state is compatible. - */ - crtc_state->mode_changed = true; - } - - ret = drm_atomic_commit(state); - } + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (ret != -EDEADLK) + break; - if (ret == -EDEADLK) { drm_modeset_backoff(&ctx); - goto retry; } + if (!ret) + ret = __intel_display_resume(dev, state); + drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); mutex_unlock(&dev->mode_config.mutex); -- cgit v0.10.2 From ed0ab110235c659fdb3f73d27907b1b45b89cf30 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 14 Jul 2016 10:42:35 +0800 Subject: clk: sunxi-ng: Fix inverted test condition in ccu_helper_wait_for_lock The condition passed to read*_poll_timeout() is the break condition, i.e. wait for this condition to happen and return success. The original code assumed the opposite, resulting in a warning when the PLL clock rate was changed but never lost it's lock as far as the readout indicated. This was verified by checking the read out register value. Fixes: 1d80c14248d6 ("clk: sunxi-ng: Add common infrastructure") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index fc17b52..51d4bac 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -31,7 +31,7 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock) return; WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg, - !(reg & lock), 100, 70000)); + reg & lock, 100, 70000)); } int sunxi_ccu_probe(struct device_node *node, void __iomem *reg, -- cgit v0.10.2 From 672ca65d9aa8578f382784fe73578cd499664828 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Wed, 10 Aug 2016 14:07:34 +0200 Subject: tipc: fix variable dereference before NULL check In commit cf6f7e1d5109 ("tipc: dump monitor attributes"), I dereferenced a pointer before checking if its valid. This is reported by static check Smatch as: net/tipc/monitor.c:733 tipc_nl_add_monitor_peer() warn: variable dereferenced before check 'mon' (see line 731) In this commit, we check for a valid monitor before proceeding with any other operation. Fixes: cf6f7e1d5109 ("tipc: dump monitor attributes") Reported-by: Dan Carpenter Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index b62caa1..ed97a58 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -728,12 +728,13 @@ int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *peer = mon->self; + struct tipc_peer *peer; if (!mon) return -EINVAL; read_lock_bh(&mon->lock); + peer = mon->self; do { if (*prev_node) { if (peer->addr == *prev_node) -- cgit v0.10.2 From dafa6b0db2d62164c5ef81a40312d5ba514126b9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 10 Aug 2016 17:48:36 +0200 Subject: net: hns: fix typo in g_gmac_stats_string[] s/gamc/gmac/ Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 1235c7f..1e1eb92 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -17,7 +17,7 @@ static const struct mac_stats_string g_gmac_stats_string[] = { {"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)}, {"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)}, {"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)}, - {"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)}, + {"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)}, {"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)}, {"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)}, {"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)}, -- cgit v0.10.2 From e7f851684efb3377e9c93aca7fae6e76212e5680 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Aug 2016 23:37:34 -0700 Subject: megaraid_sas: Fix probing cards without io port Found one megaraid_sas HBA probe fails, [ 187.235190] scsi host2: Avago SAS based MegaRAID driver [ 191.112365] megaraid_sas 0000:89:00.0: BAR 0: can't reserve [io 0x0000-0x00ff] [ 191.120548] megaraid_sas 0000:89:00.0: IO memory region busy! and the card has resource like, [ 125.097714] pci 0000:89:00.0: [1000:005d] type 00 class 0x010400 [ 125.104446] pci 0000:89:00.0: reg 0x10: [io 0x0000-0x00ff] [ 125.110686] pci 0000:89:00.0: reg 0x14: [mem 0xce400000-0xce40ffff 64bit] [ 125.118286] pci 0000:89:00.0: reg 0x1c: [mem 0xce300000-0xce3fffff 64bit] [ 125.125891] pci 0000:89:00.0: reg 0x30: [mem 0xce200000-0xce2fffff pref] that does not io port resource allocated from BIOS, and kernel can not assign one as io port shortage. The driver is only looking for MEM, and should not fail. It turns out megasas_init_fw() etc are using bar index as mask. index 1 is used as mask 1, so that pci_request_selected_regions() is trying to request BAR0 instead of BAR1. Fix all related reference. Fixes: b6d5d8808b4c ("megaraid_sas: Use lowest memory bar for SR-IOV VF support") Signed-off-by: Yinghai Lu Acked-by: Kashyap Desai Signed-off-by: Martin K. Petersen diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 2dab3dc..c1ed25a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5037,7 +5037,7 @@ static int megasas_init_fw(struct megasas_instance *instance) /* Find first memory bar */ bar_list = pci_select_bars(instance->pdev, IORESOURCE_MEM); instance->bar = find_first_bit(&bar_list, sizeof(unsigned long)); - if (pci_request_selected_regions(instance->pdev, instance->bar, + if (pci_request_selected_regions(instance->pdev, 1<bar, "megasas: LSI")) { dev_printk(KERN_DEBUG, &instance->pdev->dev, "IO memory region busy!\n"); return -EBUSY; @@ -5339,7 +5339,7 @@ fail_ready_state: iounmap(instance->reg_set); fail_ioremap: - pci_release_selected_regions(instance->pdev, instance->bar); + pci_release_selected_regions(instance->pdev, 1<bar); return -EINVAL; } @@ -5360,7 +5360,7 @@ static void megasas_release_mfi(struct megasas_instance *instance) iounmap(instance->reg_set); - pci_release_selected_regions(instance->pdev, instance->bar); + pci_release_selected_regions(instance->pdev, 1<bar); } /** diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index ec83754..52d8bbf 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2603,7 +2603,7 @@ megasas_release_fusion(struct megasas_instance *instance) iounmap(instance->reg_set); - pci_release_selected_regions(instance->pdev, instance->bar); + pci_release_selected_regions(instance->pdev, 1<bar); } /** -- cgit v0.10.2 From 0d7826ddded60dbe36c451b462c24387e2727275 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 10 Aug 2016 22:45:57 -0400 Subject: ipr: Fix sync scsi scan Commit b195d5e2bffd ("ipr: Wait to do async scan until scsi host is initialized") fixed async scan for ipr, but broke sync scan. This fixes sync scan back up. Signed-off-by: Brian King Tested-by: Michael Ellerman Signed-off-by: Martin K. Petersen diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index bf85974..17d04c7 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -10410,8 +10410,11 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) __ipr_remove(pdev); return rc; } + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + ioa_cfg->scan_enabled = 1; + schedule_work(&ioa_cfg->work_q); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); - scsi_scan_host(ioa_cfg->host); ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { @@ -10421,10 +10424,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) } } - spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - ioa_cfg->scan_enabled = 1; - schedule_work(&ioa_cfg->work_q); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); + scsi_scan_host(ioa_cfg->host); + return 0; } -- cgit v0.10.2 From 4b5b9ba553f9aa5f484ab972fc9b58061885ceca Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Tue, 9 Aug 2016 16:24:50 +0100 Subject: openvswitch: do not ignore netdev errors when creating tunnel vports The creation of a tunnel vport (geneve, gre, vxlan) brings up a corresponding netdev, a multi-step operation which can fail. For example, changing a vxlan vport's netdev state to 'up' binds the vport's socket to a UDP port - if the binding fails (e.g. due to the port being in use), the error is currently ignored giving the appearance that the tunnel vport creation completed successfully. Signed-off-by: Martynas Pumputis Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 1a1fcec..5aaf3ba 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -93,7 +93,14 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - dev_change_flags(dev, dev->flags | IFF_UP); + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + goto error; + } + rtnl_unlock(); return vport; error: diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 7f8897f..0e72d95 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -54,6 +54,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) struct net *net = ovs_dp_get_net(parms->dp); struct net_device *dev; struct vport *vport; + int err; vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms); if (IS_ERR(vport)) @@ -67,9 +68,15 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - dev_change_flags(dev, dev->flags | IFF_UP); - rtnl_unlock(); + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + return ERR_PTR(err); + } + rtnl_unlock(); return vport; } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 5eb7694..7eb955e 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -130,7 +130,14 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - dev_change_flags(dev, dev->flags | IFF_UP); + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + goto error; + } + rtnl_unlock(); return vport; error: -- cgit v0.10.2 From 851da9ac53a5e7121f7c38a052f74b574dd73ec3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Aug 2016 23:54:09 +0200 Subject: drm/mediatek: add COMMON_CLK dependency On kernel builds without COMMON_CLK, the newly added mediatek drm driver fails to build: drivers/gpu/drm/mediatek/mtk_mipi_tx.c:130:16: error: field 'pll_hw' has incomplete type struct clk_hw pll_hw; ^~~~~~ In file included from ../include/linux/clk.h:16:0, from ../drivers/gpu/drm/mediatek/mtk_mipi_tx.c:14: drivers/gpu/drm/mediatek/mtk_mipi_tx.c: In function 'mtk_mipi_tx_from_clk_hw': include/linux/kernel.h:831:48: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ^ /drivers/gpu/drm/mediatek/mtk_mipi_tx.c:136:9: note: in expansion of macro 'container_of' return container_of(hw, struct mtk_mipi_tx, pll_hw); ^~~~~~~~~~~~ drivers/gpu/drm/mediatek/mtk_mipi_tx.c: At top level: drivers/gpu/drm/mediatek/mtk_mipi_tx.c:302:21: error: variable 'mtk_mipi_tx_pll_ops' has initializer but incomplete type static const struct clk_ops mtk_mipi_tx_pll_ops = { This adds the required Kconfig dependency. Signed-off-by: Arnd Bergmann Acked-by: Philipp Zabel Link: https://patchwork.kernel.org/patch/9069061/ Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 23ac804..5c21637 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -2,6 +2,7 @@ config DRM_MEDIATEK tristate "DRM Support for Mediatek SoCs" depends on DRM depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST) + depends on COMMON_CLK select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI -- cgit v0.10.2 From b5db361eb7746cc9ada51554a6b996170c439f0b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Aug 2016 23:54:10 +0200 Subject: drm/mediatek: add CONFIG_OF dependency The mediatek DRM driver can be configured for compile testing with CONFIG_OF disabled, but then fails to link: drivers/gpu/built-in.o: In function `mtk_drm_bind': analogix_dp_reg.c:(.text+0x52888): undefined reference to `of_find_device_by_node' analogix_dp_reg.c:(.text+0x52930): undefined reference to `of_find_device_by_node' This adds an explicit Kconfig dependency. Signed-off-by: Arnd Bergmann Link: https://patchwork.kernel.org/patch/9120871/ Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 5c21637..96ebf8b 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -3,6 +3,7 @@ config DRM_MEDIATEK depends on DRM depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST) depends on COMMON_CLK + depends on OF select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI -- cgit v0.10.2 From 306d674a2ddd565adde7d9d6703157105c8444a6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Aug 2016 23:54:11 +0200 Subject: drm/mediatek: add ARM_SMCCC dependency ARM SMCCC is only set for ARMv7 and ARMv8 CPUs, but we currently allow the driver to be build for older architecture levels as well, which results in a link failure: drivers/gpu/built-in.o: In function `mtk_hdmi_hw_make_reg_writable': :(.text+0x1e737c): undefined reference to `arm_smccc_smc' This adds a Kconfig dependency. The patch applies on my two previous fixes that are not yet applied, so please apply all three to get randconfig builds to work correctly. Signed-off-by: Arnd Bergmann Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support") Reviewed-by: Matthias Brugger Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 96ebf8b..294de45 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -3,6 +3,7 @@ config DRM_MEDIATEK depends on DRM depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST) depends on COMMON_CLK + depends on HAVE_ARM_SMCCC depends on OF select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER -- cgit v0.10.2 From af7752106e4f12b4ee47b4eca3e7ba4bcec6e7e5 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 9 Aug 2016 15:58:48 +0200 Subject: s390/dasd: fix failing CUIR assignment under LPAR On LPAR the read message buffer command should be executed on the path it was received on otherwise there is a chance that the CUIR assignment might be faulty and the wrong channel path is set online/offline. Fix by setting the path mask accordingly. On z/VM we might not be able to do I/O on this path but there it does not matter on which path the read message buffer command is executed. Therefor implement a retry with an open path mask. Signed-off-by: Stefan Haberland diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index fd2eff4..98bbec4 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -5078,6 +5078,8 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, return PTR_ERR(cqr); } + cqr->lpm = lpum; +retry: cqr->startdev = device; cqr->memdev = device; cqr->block = NULL; @@ -5122,6 +5124,14 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, (prssdp + 1); memcpy(messages, message_buf, sizeof(struct dasd_rssd_messages)); + } else if (cqr->lpm) { + /* + * on z/VM we might not be able to do I/O on the requested path + * but instead we get the required information on any path + * so retry with open path mask + */ + cqr->lpm = 0; + goto retry; } else DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "Reading messages failed with rc=%d\n" -- cgit v0.10.2 From 772ce81264b179c0e61340998e3b29e900b2fa6d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 8 Aug 2016 21:50:51 +0900 Subject: usb: renesas_usbhs: Fix receiving data corrupt on R-Car Gen3 with dmac Since R-Car Gen3 SoC has the USB-DMAC, this driver should set dparam->has_usb_dmac to 1. Otherwise, behavior of this driver and the usb-dmac driver will be mismatch, then sometimes receiving data will be corrupt. Fixes: de18757e272d ("usb: renesas_usbhs: add R-Car Gen3 power control") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 8fbbc2d..ac67bab 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -514,7 +514,8 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev) if (gpio > 0) dparam->enable_gpio = gpio; - if (dparam->type == USBHS_TYPE_RCAR_GEN2) + if (dparam->type == USBHS_TYPE_RCAR_GEN2 || + dparam->type == USBHS_TYPE_RCAR_GEN3) dparam->has_usb_dmac = 1; return info; -- cgit v0.10.2 From 9ab967e6db7412b675ecbff80d5371d53c82cb2e Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 8 Aug 2016 21:50:52 +0900 Subject: usb: renesas_usbhs: clear the BRDYSTS in usbhsg_ep_enable() This patch fixes an issue that unexpected BRDY interruption happens when the usb_ep_{enable,disable}() are called with different direction. In this case, the driver will cause the following message: renesas_usbhs e6590000.usb: irq_ready run_error 1 : -16 This issue causes the followings: 1) A pipe is enabled as transmission 2) The pipe sent a data 3) The pipe is disabled and re-enabled as reception. 4) The pipe got a queue Since the driver doesn't clear the BRDYSTS flags after 2) above, the issue happens. If we add such clearing the flags into the driver, the code will become complicate. So, this patch clears the BRDYSTS flag of reception in usbhsg_ep_enable() to avoid complicate. Cc: # v4.1+ (usbhs_xxxsts_clear() is needed) Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 50f3363..92bc83b 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -617,10 +617,13 @@ static int usbhsg_ep_enable(struct usb_ep *ep, * use dmaengine if possible. * It will use pio handler if impossible. */ - if (usb_endpoint_dir_in(desc)) + if (usb_endpoint_dir_in(desc)) { pipe->handler = &usbhs_fifo_dma_push_handler; - else + } else { pipe->handler = &usbhs_fifo_dma_pop_handler; + usbhs_xxxsts_clear(priv, BRDYSTS, + usbhs_pipe_number(pipe)); + } ret = 0; } -- cgit v0.10.2 From 700aa7ff8d2c2b9cc669c99375e2ccd06d3cd38d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 8 Aug 2016 21:50:53 +0900 Subject: usb: renesas_usbhs: Use dmac only if the pipe type is bulk This patch fixes an issue that isochronous transfer's data is possible to be lost as a workaround. Since this driver uses a workqueue to start the dmac, the transfer is possible to be delayed when system load is high. Fixes: 6e4b74e4690d ("usb: renesas: fix scheduling in atomic context bug") Cc: # v3.4+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 280ed5f..857e783 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -871,7 +871,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done) /* use PIO if packet is less than pio_dma_border or pipe is DCP */ if ((len < usbhs_get_dparam(priv, pio_dma_border)) || - usbhs_pipe_is_dcp(pipe)) + usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC)) goto usbhsf_pio_prepare_push; /* check data length if this driver don't use USB-DMAC */ @@ -976,7 +976,7 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt, /* use PIO if packet is less than pio_dma_border or pipe is DCP */ if ((pkt->length < usbhs_get_dparam(priv, pio_dma_border)) || - usbhs_pipe_is_dcp(pipe)) + usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC)) goto usbhsf_pio_prepare_pop; fifo = usbhsf_get_dma_fifo(priv, pkt); -- cgit v0.10.2 From c526c62d565ea5a5bba9433f28756079734f430d Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 1 Jul 2016 15:33:28 +0800 Subject: usb: gadget: composite: fix dereference after null check coverify warning cdev->config is checked for null pointer at above code, so cdev->config might be null, fix it by adding null pointer check. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index eb64848..a8ecc7a 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1913,6 +1913,8 @@ unknown: break; case USB_RECIP_ENDPOINT: + if (!cdev->config) + break; endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f); list_for_each_entry(f, &cdev->config->functions, list) { if (test_bit(endp, f->endpoints)) -- cgit v0.10.2 From 88c09eacf560c3303d0ee8cf91b8b7ff7f000350 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 1 Jul 2016 15:33:29 +0800 Subject: usb: gadget: u_ether: fix dereference after null check coverify warning dev->port_usb is checked for null pointer at above code, so dev->port_usb might be null, fix it by adding null pointer check. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index a3f7e7c..5f562c1 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -556,7 +556,8 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, /* Multi frame CDC protocols may store the frame for * later which is not a dropped frame. */ - if (dev->port_usb->supports_multi_frame) + if (dev->port_usb && + dev->port_usb->supports_multi_frame) goto multiframe; goto drop; } -- cgit v0.10.2 From cee51c33f52ebf673a088a428ac0fecc33ab77fa Mon Sep 17 00:00:00 2001 From: Winter Wang Date: Wed, 27 Jul 2016 10:03:19 +0800 Subject: usb: gadget: configfs: add mutex lock before unregister gadget There may be a race condition if f_fs calls unregister_gadget_item in ffs_closed() when unregister_gadget is called by UDC store at the same time. this leads to a kernel NULL pointer dereference: [ 310.644928] Unable to handle kernel NULL pointer dereference at virtual address 00000004 [ 310.645053] init: Service 'adbd' is being killed... [ 310.658938] pgd = c9528000 [ 310.662515] [00000004] *pgd=19451831, *pte=00000000, *ppte=00000000 [ 310.669702] Internal error: Oops: 817 [#1] PREEMPT SMP ARM [ 310.675211] Modules linked in: [ 310.678294] CPU: 0 PID: 1537 Comm: ->transport Not tainted 4.1.15-03725-g793404c #2 [ 310.685958] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 310.692493] task: c8e24200 ti: c945e000 task.ti: c945e000 [ 310.697911] PC is at usb_gadget_unregister_driver+0xb4/0xd0 [ 310.703502] LR is at __mutex_lock_slowpath+0x10c/0x16c [ 310.708648] pc : [] lr : [] psr: 600f0113 [ 311.565585] [] (usb_gadget_unregister_driver) from [] (unregister_gadget_item+0x1c/0x34) [ 311.575426] [] (unregister_gadget_item) from [] (ffs_closed+0x8c/0x9c) [ 311.583702] [] (ffs_closed) from [] (ffs_data_reset+0xc/0xa0) [ 311.591194] [] (ffs_data_reset) from [] (ffs_data_closed+0x90/0xd0) [ 311.599208] [] (ffs_data_closed) from [] (ffs_ep0_release+0xc/0x14) [ 311.607224] [] (ffs_ep0_release) from [] (__fput+0x80/0x1d0) [ 311.614635] [] (__fput) from [] (task_work_run+0xb0/0xe8) [ 311.621788] [] (task_work_run) from [] (do_work_pending+0x7c/0xa4) [ 311.629718] [] (do_work_pending) from [] (work_pending+0xc/0x20) for functions using functionFS, i.e. android adbd will close /dev/usb-ffs/adb/ep0 when usb IO thread fails, but switch adb from on to off also triggers write "none" > UDC. These 2 operations both call unregister_gadget, which will lead to the panic above. add a mutex before calling unregister_gadget for api used in f_fs. Signed-off-by: Winter Wang Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 70cf347..f9237fe 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1490,7 +1490,9 @@ void unregister_gadget_item(struct config_item *item) { struct gadget_info *gi = to_gadget_info(item); + mutex_lock(&gi->lock); unregister_gadget(gi); + mutex_unlock(&gi->lock); } EXPORT_SYMBOL_GPL(unregister_gadget_item); -- cgit v0.10.2 From ec57fcd042a9448096705847290d4e4e97fbb7e1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:48:39 +0000 Subject: usb: phy: omap-otg: Fix missing platform_set_drvdata() in omap_otg_probe() Add missing platform_set_drvdata() in omap_otg_probe(), otherwise calling platform_get_drvdata() in remove returns NULL. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c index 6f6d2a7..6523af4 100644 --- a/drivers/usb/phy/phy-omap-otg.c +++ b/drivers/usb/phy/phy-omap-otg.c @@ -140,6 +140,8 @@ static int omap_otg_probe(struct platform_device *pdev) (rev >> 4) & 0xf, rev & 0xf, config->extcon, otg_dev->id, otg_dev->vbus); + platform_set_drvdata(pdev, otg_dev); + return 0; } -- cgit v0.10.2 From 4c4f106c032ff32b89c98a4d819e68e6e643c14e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:47:00 +0000 Subject: usb: dwc3: fix missing platform_set_drvdata() in dwc3_of_simple_probe() Add missing platform_set_drvdata() in dwc3_of_simple_probe(), otherwise calling platform_get_drvdata() in remove returns NULL. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index 9743353..e56d59b 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -61,6 +61,7 @@ static int dwc3_of_simple_probe(struct platform_device *pdev) if (!simple->clks) return -ENOMEM; + platform_set_drvdata(pdev, simple); simple->dev = dev; for (i = 0; i < simple->num_clocks; i++) { -- cgit v0.10.2 From 528d28138f91009f230903bd89ccd44719667831 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 26 Jul 2016 16:01:30 +0800 Subject: usb: misc: usbtest: usbtest_do_ioctl may return positive integer For case 14 and case 21, their correct return value is the number of bytes transferred, so it is a positive integer. But in usbtest_ioctl, it takes non-zero as false return value for usbtest_do_ioctl, so it will treat the correct test as wrong test, then the time on tests will be the minus value. Signed-off-by: Peter Chen Cc: stable Fixes: 18fc4ebdc705 ("usb: misc: usbtest: Remove timeval usage") Signed-off-by: Felipe Balbi diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 6b978f0..5e3464e 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -2602,7 +2602,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf) ktime_get_ts64(&start); retval = usbtest_do_ioctl(intf, param_32); - if (retval) + if (retval < 0) goto free_mutex; ktime_get_ts64(&end); -- cgit v0.10.2 From 63196e989699ddffb4e3d30ca8c3567d408820f4 Mon Sep 17 00:00:00 2001 From: Binyamin Sharet Date: Thu, 7 Jul 2016 22:22:04 +0300 Subject: usb: gadget: fix check in sync read from ep in gadgetfs When reading synchronously from a non-zero endpoint, gadgetfs will return -EFAULT even if the read succeeds, due to a bad check of the copy_to_iter() return value. This fix compares the return value of copy_to_iter to the amount of bytes that was passed, and only fails if they are not the same. Signed-off-by: Binyamin Sharet Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index aa3707b..8560f2f 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -606,7 +606,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to) } if (is_sync_kiocb(iocb)) { value = ep_io(epdata, buf, len); - if (value >= 0 && copy_to_iter(buf, value, to)) + if (value >= 0 && (copy_to_iter(buf, value, to) != value)) value = -EFAULT; } else { struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); -- cgit v0.10.2 From bd610c5aa9fcc9817d2629274a27aab81aa77cec Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 16 Jul 2016 09:04:40 +0200 Subject: usb: gadget: uvc: Fix return value in case of error If this memory allocation fail, we will return 0, which means success. Return -ENOMEM instead. Reviewed-by: Laurent Pinchart Signed-off-by: Christophe JAILLET Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 66753ba..31125a4 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2023,7 +2023,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src, if (!data) { kfree(*class_array); *class_array = NULL; - ret = PTR_ERR(data); + ret = -ENOMEM; goto unlock; } cl_arr = *class_array; -- cgit v0.10.2 From 3887db5c2b6531c59e9649a4293071b575d6eb3b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 16 Jul 2016 08:34:33 +0200 Subject: usb: gadget: composite: Fix return value in case of error In 'composite_os_desc_req_prepare', if one of the memory allocations fail, 0 will be returned, which means success. We should return -ENOMEM instead. Signed-off-by: Christophe JAILLET Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index a8ecc7a..5ebe6af7 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2126,14 +2126,14 @@ int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, cdev->os_desc_req = usb_ep_alloc_request(ep0, GFP_KERNEL); if (!cdev->os_desc_req) { - ret = PTR_ERR(cdev->os_desc_req); + ret = -ENOMEM; goto end; } /* OS feature descriptor length <= 4kB */ cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL); if (!cdev->os_desc_req->buf) { - ret = PTR_ERR(cdev->os_desc_req->buf); + ret = -ENOMEM; kfree(cdev->os_desc_req); goto end; } -- cgit v0.10.2 From 327b21da884fe1a29f733e41792ddd53e4a30379 Mon Sep 17 00:00:00 2001 From: Mathieu Laurendeau Date: Fri, 15 Jul 2016 14:58:41 +0200 Subject: usb/gadget: fix gadgetfs aio support. Fix io submissions failing with ENODEV. Signed-off-by: Mathieu Laurendeau Fixes: 7fe3976e0f3a ("gadget: switch ep_io_operations to ->read_iter/->write_iter") Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 8560f2f..16104b5e 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -542,7 +542,7 @@ static ssize_t ep_aio(struct kiocb *iocb, */ spin_lock_irq(&epdata->dev->lock); value = -ENODEV; - if (unlikely(epdata->ep)) + if (unlikely(epdata->ep == NULL)) goto fail; req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC); -- cgit v0.10.2 From 7442e6db5bdd0dce4615205508301f9b22e502d6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Jul 2016 13:14:33 +0300 Subject: usb: gadget: fsl_qe_udc: off by one in setup_received_handle() The udc->eps[] array has USB_MAX_ENDPOINTS elements so > should be >=. Fixes: 3948f0e0c999 ('usb: add Freescale QE/CPM USB peripheral controller driver') Acked-by: Peter Chen Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c index 93d28cb..cf8819a 100644 --- a/drivers/usb/gadget/udc/fsl_qe_udc.c +++ b/drivers/usb/gadget/udc/fsl_qe_udc.c @@ -2053,7 +2053,7 @@ static void setup_received_handle(struct qe_udc *udc, struct qe_ep *ep; if (wValue != 0 || wLength != 0 - || pipe > USB_MAX_ENDPOINTS) + || pipe >= USB_MAX_ENDPOINTS) break; ep = &udc->eps[pipe]; -- cgit v0.10.2 From 17a1dc5e22d25ba374d8c8dd8d5bf10bd20d3265 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 11 Jul 2016 09:58:16 +0800 Subject: usb: udc: core: fix error handling The udc device needs to be deleted if error occurs Fixes: 855ed04a3758 ("usb: gadget: udc-core: independent registration of gadgets and gadget drivers") Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index ff8685e..934f838 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1145,7 +1145,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, if (ret != -EPROBE_DEFER) list_del(&driver->pending); if (ret) - goto err4; + goto err5; break; } } @@ -1154,6 +1154,9 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, return 0; +err5: + device_del(&udc->dev); + err4: list_del(&udc->list); mutex_unlock(&udc_lock); -- cgit v0.10.2 From 207707d8fd48ebc977fb2b2794004a020e1ee08e Mon Sep 17 00:00:00 2001 From: Xerox Lin Date: Wed, 29 Jun 2016 14:34:21 +0530 Subject: usb: gadget: rndis: free response queue during REMOTE_NDIS_RESET_MSG When rndis data transfer is in progress, some Windows7 Host PC is not sending the GET_ENCAPSULATED_RESPONSE command for receiving the response for the previous SEND_ENCAPSULATED_COMMAND processed. The rndis function driver appends each response for the SEND_ENCAPSULATED_COMMAND in a queue. As the above process got corrupted, the Host sends a REMOTE_NDIS_RESET_MSG command to do a soft-reset. As the rndis response queue is not freed, the previous response is sent as a part of this REMOTE_NDIS_RESET_MSG's reset response and the Host block any more Rndis transfers. Hence free the rndis response queue as a part of this soft-reset so that the correct response for REMOTE_NDIS_RESET_MSG is sent properly during the response command. Signed-off-by: Rajkumar Raghupathy Signed-off-by: Xerox Lin [AmitP: Cherry-picked this patch and folded other relevant fixes from Android common kernel android-4.4] Signed-off-by: Amit Pundir Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 943c21a..ab6ac1b 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -680,6 +680,12 @@ static int rndis_reset_response(struct rndis_params *params, { rndis_reset_cmplt_type *resp; rndis_resp_t *r; + u8 *xbuf; + u32 length; + + /* drain the response queue */ + while ((xbuf = rndis_get_next_response(params, &length))) + rndis_free_response(params, xbuf); r = rndis_add_response(params, sizeof(rndis_reset_cmplt_type)); if (!r) -- cgit v0.10.2 From 79d17482a4091056e128a5048e253f2ed53440cc Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 7 Jul 2016 08:39:09 +0200 Subject: usb: dwc3: don't set last bit for ISOC endpoints According to Synopsys Databook 2.60a, section 8.3.4, it's stated that: The LST bit should be set to 0 (isochronous transfers normally continue until the endpoint is removed entirely, at which time an End Transfer command is used to stop the transfer). This patch makes sure that detail is observed and fixes a regression with Android Audio playback caused by recent changes to DWC3 gadget. Signed-off-by: Janusz Dziedzic Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index eb820e4..7a4d4d2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -829,7 +829,7 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, if (!req->request.no_interrupt && !chain) trb->ctrl |= DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_ISP_IMI; - if (last) + if (last && !usb_endpoint_xfer_isoc(dep->endpoint.desc)) trb->ctrl |= DWC3_TRB_CTRL_LST; if (chain) -- cgit v0.10.2 From a0ad85ae866f8a01f29a18ffa1e9b1aa8ca888bd Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 10 Aug 2016 18:07:46 +0300 Subject: usb: dwc3: gadget: stop processing on HWO set stop consuming TRBs when we reach one with HWO bit already set. This will prevent us from prematurely retiring a TRB. Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 7a4d4d2..1f5597e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1979,16 +1979,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, trb->ctrl &= ~DWC3_TRB_CTRL_HWO; if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN) - /* - * We continue despite the error. There is not much we - * can do. If we don't clean it up we loop forever. If - * we skip the TRB then it gets overwritten after a - * while since we use them in a ring buffer. A BUG() - * would help. Lets hope that if this occurs, someone - * fixes the root cause instead of looking away :) - */ - dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n", - dep->name, trb); + return 1; count = trb->size & DWC3_TRB_SIZE_MASK; -- cgit v0.10.2 From 70fcad495b9903d362bdbf9ffba23259294064c8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 11 Aug 2016 14:39:00 +0100 Subject: ASoC: Fix leak of rtd in soc_bind_dai_link If we fail to find a platform we simply return EPROBE_DEFER, but we have allocated the rtd pointer. All error paths before soc_add_pcm_runtime need to call soc_free_pcm_runtime first to avoid leaking the rtd pointer. A suitable error path already exists and is used else where in the function so simply use that here as well. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 16369ca..b0e23db 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1056,7 +1056,7 @@ static int soc_bind_dai_link(struct snd_soc_card *card, if (!rtd->platform) { dev_err(card->dev, "ASoC: platform %s not registered\n", dai_link->platform_name); - return -EPROBE_DEFER; + goto _err_defer; } soc_add_pcm_runtime(card, rtd); -- cgit v0.10.2 From fa54aade5338937146bb6b8af93d2c27f6787ae1 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 11 Aug 2016 14:40:04 +0100 Subject: ASoC: wm2000: Fix return of uninitialised varible Anything that sets ret in wm2000_anc_transition will have immediately returned anyway as such we will always return an uninitialised ret at the bottom of the function. Simply replace the return with a return 0; Signed-off-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index a67ea10..f266439 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -581,7 +581,7 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000, if (anc_transitions[i].dest == ANC_OFF) clk_disable_unprepare(wm2000->mclk); - return ret; + return 0; } static int wm2000_anc_set_mode(struct wm2000_priv *wm2000) -- cgit v0.10.2 From d0141191a20289f8955c1e03dad08e42e6f71ca9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 11 Aug 2016 11:50:30 -0400 Subject: ext4: fix xattr shifting when expanding inodes The code in ext4_expand_extra_isize_ea() treated new_extra_isize argument sometimes as the desired target i_extra_isize and sometimes as the amount by which we need to grow current i_extra_isize. These happen to coincide when i_extra_isize is 0 which used to be the common case and so nobody noticed this until recently when we added i_projid to the inode and so i_extra_isize now needs to grow from 28 to 32 bytes. The result of these bugs was that we sometimes unnecessarily decided to move xattrs out of inode even if there was enough space and we often ended up corrupting in-inode xattrs because arguments to ext4_xattr_shift_entries() were just wrong. This could demonstrate itself as BUG_ON in ext4_xattr_shift_entries() triggering. Fix the problem by introducing new isize_diff variable and use it where appropriate. CC: stable@vger.kernel.org # 4.4.x Reported-by: Dave Chinner Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 39e9cfb..cb1d7b4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1353,11 +1353,13 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, size_t min_offs, free; int total_ino; void *base, *start, *end; - int extra_isize = 0, error = 0, tried_min_extra_isize = 0; + int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); + int isize_diff; /* How much do we need to grow i_extra_isize */ down_write(&EXT4_I(inode)->xattr_sem); retry: + isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { up_write(&EXT4_I(inode)->xattr_sem); return 0; @@ -1382,7 +1384,7 @@ retry: goto cleanup; free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); - if (free >= new_extra_isize) { + if (free >= isize_diff) { entry = IFIRST(header); ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - new_extra_isize, (void *)raw_inode + @@ -1414,7 +1416,7 @@ retry: end = bh->b_data + bh->b_size; min_offs = end - base; free = ext4_xattr_free_space(first, &min_offs, base, NULL); - if (free < new_extra_isize) { + if (free < isize_diff) { if (!tried_min_extra_isize && s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; @@ -1428,7 +1430,7 @@ retry: free = inode->i_sb->s_blocksize; } - while (new_extra_isize > 0) { + while (isize_diff > 0) { size_t offs, size, entry_size; struct ext4_xattr_entry *small_entry = NULL; struct ext4_xattr_info i = { @@ -1459,7 +1461,7 @@ retry: EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + EXT4_XATTR_LEN(last->e_name_len); if (total_size <= free && total_size < min_total_size) { - if (total_size < new_extra_isize) { + if (total_size < isize_diff) { small_entry = last; } else { entry = last; @@ -1516,20 +1518,19 @@ retry: goto cleanup; entry = IFIRST(header); - if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) - shift_bytes = new_extra_isize; + if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) + shift_bytes = isize_diff; else shift_bytes = entry_size + size; /* Adjust the offsets and shift the remaining entries ahead */ - ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - - shift_bytes, (void *)raw_inode + - EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, + ext4_xattr_shift_entries(entry, -shift_bytes, + (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize + shift_bytes, (void *)header, total_ino - entry_size, inode->i_sb->s_blocksize); - extra_isize += shift_bytes; - new_extra_isize -= shift_bytes; - EXT4_I(inode)->i_extra_isize = extra_isize; + isize_diff -= shift_bytes; + EXT4_I(inode)->i_extra_isize += shift_bytes; i.name = b_entry_name; i.value = buffer; -- cgit v0.10.2 From 418c12d08dc64a45107c467ec1ba29b5e69b0715 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 11 Aug 2016 11:58:32 -0400 Subject: ext4: fix xattr shifting when expanding inodes part 2 When multiple xattrs need to be moved out of inode, we did not properly recompute total size of xattr headers in the inode and the new header position. Thus when moving the second and further xattr we asked ext4_xattr_shift_entries() to move too much and from the wrong place, resulting in possible xattr value corruption or general memory corruption. CC: stable@vger.kernel.org # 4.4.x Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index cb1d7b4..b18b1ff 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1516,6 +1516,7 @@ retry: error = ext4_xattr_ibody_set(handle, inode, &i, is); if (error) goto cleanup; + total_ino -= entry_size; entry = IFIRST(header); if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) @@ -1526,11 +1527,11 @@ retry: ext4_xattr_shift_entries(entry, -shift_bytes, (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize + shift_bytes, - (void *)header, total_ino - entry_size, - inode->i_sb->s_blocksize); + (void *)header, total_ino, inode->i_sb->s_blocksize); isize_diff -= shift_bytes; EXT4_I(inode)->i_extra_isize += shift_bytes; + header = IHDR(inode, raw_inode); i.name = b_entry_name; i.value = buffer; -- cgit v0.10.2 From 443a8c41cd49de66a3fda45b32b9860ea0292b84 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 11 Aug 2016 12:00:01 -0400 Subject: ext4: properly align shifted xattrs when expanding inodes We did not count with the padding of xattr value when computing desired shift of xattrs in the inode when expanding i_extra_isize. As a result we could create unaligned start of inline xattrs. Account for alignment properly. CC: stable@vger.kernel.org # 4.4.x- Signed-off-by: Jan Kara diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b18b1ff..c893f00 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1522,7 +1522,7 @@ retry: if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) shift_bytes = isize_diff; else - shift_bytes = entry_size + size; + shift_bytes = entry_size + EXT4_XATTR_SIZE(size); /* Adjust the offsets and shift the remaining entries ahead */ ext4_xattr_shift_entries(entry, -shift_bytes, (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + -- cgit v0.10.2 From 539587511835ea12d8daa444cbed766cf2bc3612 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 11 Aug 2016 10:31:14 +0800 Subject: usb: misc: usbtest: add fix for driver hang In sg_timeout(), req->status is set to "-ETIMEDOUT" before calling into usb_sg_cancel(). usb_sg_cancel() will do nothing and return directly if req->status has been set to a non-zero value. This will cause driver hang whenever transfer time out is triggered. This patch fixes this issue. It could be backported to stable kernel with version later than v3.15. Cc: stable@vger.kernel.org # 3.15+ Cc: Alan Stern Signed-off-by: Lu Baolu Suggested-by: Alan Stern Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 5e3464e..5c8210d 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -585,7 +585,6 @@ static void sg_timeout(unsigned long _req) { struct usb_sg_request *req = (struct usb_sg_request *) _req; - req->status = -ETIMEDOUT; usb_sg_cancel(req); } @@ -616,8 +615,10 @@ static int perform_sglist( mod_timer(&sg_timer, jiffies + msecs_to_jiffies(SIMPLE_IO_TIMEOUT)); usb_sg_wait(req); - del_timer_sync(&sg_timer); - retval = req->status; + if (!del_timer_sync(&sg_timer)) + retval = -ETIMEDOUT; + else + retval = req->status; /* FIXME check resulting data pattern */ -- cgit v0.10.2 From 2e81a4eeedcaa66e35f58b81e0755b87057ce392 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 11 Aug 2016 12:38:55 -0400 Subject: ext4: avoid deadlock when expanding inode size When we need to move xattrs into external xattr block, we call ext4_xattr_block_set() from ext4_expand_extra_isize_ea(). That may end up calling ext4_mark_inode_dirty() again which will recurse back into the inode expansion code leading to deadlocks. Protect from recursion using EXT4_STATE_NO_EXPAND inode flag and move its management into ext4_expand_extra_isize_ea() since its manipulation is safe there (due to xattr_sem) from possible races with ext4_xattr_set_handle() which plays with it as well. CC: stable@vger.kernel.org # 4.4.x Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5a6277d..13c95b2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) sbi->s_want_extra_isize, iloc, handle); if (ret) { - ext4_set_inode_state(inode, - EXT4_STATE_NO_EXPAND); if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { ext4_warning(inode->i_sb, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c893f00..2eb935c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1358,12 +1358,14 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int isize_diff; /* How much do we need to grow i_extra_isize */ down_write(&EXT4_I(inode)->xattr_sem); + /* + * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty + */ + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; - if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { - up_write(&EXT4_I(inode)->xattr_sem); - return 0; - } + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) + goto out; header = IHDR(inode, raw_inode); entry = IFIRST(header); @@ -1392,8 +1394,7 @@ retry: (void *)header, total_ino, inode->i_sb->s_blocksize); EXT4_I(inode)->i_extra_isize = new_extra_isize; - error = 0; - goto cleanup; + goto out; } /* @@ -1553,6 +1554,8 @@ retry: kfree(bs); } brelse(bh); +out: + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); up_write(&EXT4_I(inode)->xattr_sem); return 0; @@ -1564,6 +1567,10 @@ cleanup: kfree(is); kfree(bs); brelse(bh); + /* + * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode + * size expansion failed. + */ up_write(&EXT4_I(inode)->xattr_sem); return error; } -- cgit v0.10.2 From 104a493390940e85fb7c840a9fd5214aba5cb3bd Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 11 Aug 2016 18:15:56 +0800 Subject: macvtap: fix use after free for skb_array during release We've clean skb_array in macvtap_put_queue() but still try to pop from it during macvtap_sock_destruct(). Fix this use after free by moving the skb array cleanup to macvtap_sock_destruct() instead. Fixes: 362899b8725b ("macvtap: switch to use skb array") Reported-by: Cornelia Huck Tested-by: Cornelia Huck Signed-off-by: Jason Wang Signed-off-by: David S. Miller diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a38c0da..070e329 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -275,7 +275,6 @@ static void macvtap_put_queue(struct macvtap_queue *q) rtnl_unlock(); synchronize_rcu(); - skb_array_cleanup(&q->skb_array); sock_put(&q->sk); } @@ -533,10 +532,8 @@ static void macvtap_sock_write_space(struct sock *sk) static void macvtap_sock_destruct(struct sock *sk) { struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk); - struct sk_buff *skb; - while ((skb = skb_array_consume(&q->skb_array)) != NULL) - kfree_skb(skb); + skb_array_cleanup(&q->skb_array); } static int macvtap_open(struct inode *inode, struct file *file) -- cgit v0.10.2 From bbe11fab0b6c1d113776b2898e085bf4d1fdc607 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 11 Aug 2016 15:24:27 +0200 Subject: macsec: use after free when deleting the underlying device macsec_notify() loops over the list of macsec devices configured on the underlying device when this device is being removed. This list is part of the rx_handler data. However, macsec_dellink unregisters the rx_handler and frees the rx_handler data when the last macsec device is removed from the underlying device. Add macsec_common_dellink() to delete macsec devices without unregistering the rx_handler and freeing the associated data. Fixes: 960d5848dbf1 ("macsec: fix memory leaks around rx_handler (un)registration") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index d13e6e1..dbd590a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3047,22 +3047,29 @@ static void macsec_del_dev(struct macsec_dev *macsec) } } +static void macsec_common_dellink(struct net_device *dev, struct list_head *head) +{ + struct macsec_dev *macsec = macsec_priv(dev); + + unregister_netdevice_queue(dev, head); + list_del_rcu(&macsec->secys); + macsec_del_dev(macsec); + + macsec_generation++; +} + static void macsec_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); - macsec_generation++; + macsec_common_dellink(dev, head); - unregister_netdevice_queue(dev, head); - list_del_rcu(&macsec->secys); if (list_empty(&rxd->secys)) { netdev_rx_handler_unregister(real_dev); kfree(rxd); } - - macsec_del_dev(macsec); } static int register_macsec_dev(struct net_device *real_dev, @@ -3382,8 +3389,12 @@ static int macsec_notify(struct notifier_block *this, unsigned long event, rxd = macsec_data_rtnl(real_dev); list_for_each_entry_safe(m, n, &rxd->secys, secys) { - macsec_dellink(m->secy.netdev, &head); + macsec_common_dellink(m->secy.netdev, &head); } + + netdev_rx_handler_unregister(real_dev); + kfree(rxd); + unregister_netdevice_many(&head); break; } -- cgit v0.10.2 From 9b4b3f6a062b22550e62523efe5213776cdd426b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Aug 2016 07:26:01 -0700 Subject: ahci: disable correct irq for dummy ports irq already contains the interrupt number for the port, don't add the port index to it. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Fixes: d684a90d38e2 ("ahci: per-port msix support") Cc: stable@vger.kernel.org v4.5+ diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 7461a58..dcf2c72 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -2524,7 +2524,7 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host, /* Do not receive interrupts sent by dummy ports */ if (!pp) { - disable_irq(irq + i); + disable_irq(irq); continue; } -- cgit v0.10.2 From a3f457d9636b3f5ae4fc6502cb0c95f60f5e342b Mon Sep 17 00:00:00 2001 From: Chris Zhong Date: Tue, 9 Aug 2016 11:02:33 -0700 Subject: clk: rockchip: fix rk3399 aclk_vio gate bit Fix incorrect rk3399 aclk_vio gating bit, it should be 0, not 10. Fixes: 115510053e5e ("clk: rockchip: add clock controller for the RK3399") Signed-off-by: Chris Zhong Reviewed-by: Xing Zheng Reviewed-by: Guenter Roeck Signed-off-by: Heiko Stuebner diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index c109d80..314eab6 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -1071,7 +1071,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { /* vio */ COMPOSITE(ACLK_VIO, "aclk_vio", mux_pll_src_cpll_gpll_ppll_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS, - RK3399_CLKGATE_CON(11), 10, GFLAGS), + RK3399_CLKGATE_CON(11), 0, GFLAGS), COMPOSITE_NOMUX(PCLK_VIO, "pclk_vio", "aclk_vio", 0, RK3399_CLKSEL_CON(43), 0, 5, DFLAGS, RK3399_CLKGATE_CON(11), 1, GFLAGS), -- cgit v0.10.2 From e0cb1b84163720ec67ff0e54397fd3f57ad4a4dd Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 10 Aug 2016 09:29:43 +0200 Subject: clk: renesas: r8a7795: Fix SD clocks According to the datasheet, SDn clocks are from the SDSRC clock. And the SDSRC has a 1/2 divider. So, we should have ".sdsrc" as an internal core clock. Otherwise, since the sdhi driver will calculate clock for a sd card using the wrong parent clock rate, and then performance will be not good. Fixes: 90c073e53909da85 ("clk: shmobile: r8a7795: Add SD divider support") Signed-off-by: Yoshihiro Shimoda Acked-by: Dirk Behme Tested-by: Wolfram Sang Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Signed-off-by: Stephen Boyd diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c index d359c92..e38bf60 100644 --- a/drivers/clk/renesas/r8a7795-cpg-mssr.c +++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c @@ -69,6 +69,7 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = { DEF_FIXED(".s1", CLK_S1, CLK_PLL1_DIV2, 3, 1), DEF_FIXED(".s2", CLK_S2, CLK_PLL1_DIV2, 4, 1), DEF_FIXED(".s3", CLK_S3, CLK_PLL1_DIV2, 6, 1), + DEF_FIXED(".sdsrc", CLK_SDSRC, CLK_PLL1_DIV2, 2, 1), /* Core Clock Outputs */ DEF_FIXED("ztr", R8A7795_CLK_ZTR, CLK_PLL1_DIV2, 6, 1), @@ -87,10 +88,10 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = { DEF_FIXED("s3d2", R8A7795_CLK_S3D2, CLK_S3, 2, 1), DEF_FIXED("s3d4", R8A7795_CLK_S3D4, CLK_S3, 4, 1), - DEF_GEN3_SD("sd0", R8A7795_CLK_SD0, CLK_PLL1_DIV2, 0x0074), - DEF_GEN3_SD("sd1", R8A7795_CLK_SD1, CLK_PLL1_DIV2, 0x0078), - DEF_GEN3_SD("sd2", R8A7795_CLK_SD2, CLK_PLL1_DIV2, 0x0268), - DEF_GEN3_SD("sd3", R8A7795_CLK_SD3, CLK_PLL1_DIV2, 0x026c), + DEF_GEN3_SD("sd0", R8A7795_CLK_SD0, CLK_SDSRC, 0x0074), + DEF_GEN3_SD("sd1", R8A7795_CLK_SD1, CLK_SDSRC, 0x0078), + DEF_GEN3_SD("sd2", R8A7795_CLK_SD2, CLK_SDSRC, 0x0268), + DEF_GEN3_SD("sd3", R8A7795_CLK_SD3, CLK_SDSRC, 0x026c), DEF_FIXED("cl", R8A7795_CLK_CL, CLK_PLL1_DIV2, 48, 1), DEF_FIXED("cp", R8A7795_CLK_CP, CLK_EXTAL, 2, 1), -- cgit v0.10.2 From aa8b187eeab5e6757e959b81889ddbfda7d02ad1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 9 Aug 2016 22:09:19 -0700 Subject: hwmon: (it87) Features mask must be 32 bit wide Coverity reports: result_independent_of_operands: data->features & (65536UL /* 1UL << 16 */) is always 0 regardless of the values of its operands. This occurs as the logical operand of if. data->features needs to be 32 bit wide since there are more than 16 features. Fixes: cc18da79d9b7 ("hwmon: (it87) Support up to 6 temperature sensors ... "); Signed-off-by: Guenter Roeck diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 730d840..d0203a1 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -491,7 +491,7 @@ struct it87_sio_data { struct it87_data { const struct attribute_group *groups[7]; enum chips type; - u16 features; + u32 features; u8 peci_mask; u8 old_peci_mask; -- cgit v0.10.2 From 20c389e656a89e2302017bf3f499cb5a31a2a7ba Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Tue, 2 Aug 2016 15:19:58 +0800 Subject: clk: rockchip: fix incorrect aclk_emmc source gate bits on rk3399 Dues to incorrect diagram, we need to fix incorrect bits for (c/g)pll_aclk_emmc_src: cpll_aclk_emmc_src --> G6[13] gpll_aclk_emmc_src --> G6[12] Fixes: 115510053e5e ("clk: rockchip: add clock controller for the RK3399") Signed-off-by: Xing Zheng Reviewed-by: Shawn Lin Signed-off-by: Heiko Stuebner diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 314eab6..01fa60e 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -923,9 +923,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { RK3399_CLKGATE_CON(6), 14, GFLAGS), GATE(0, "cpll_aclk_emmc_src", "cpll", CLK_IGNORE_UNUSED, - RK3399_CLKGATE_CON(6), 12, GFLAGS), - GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(6), 13, GFLAGS), + GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED, + RK3399_CLKGATE_CON(6), 12, GFLAGS), COMPOSITE_NOGATE(ACLK_EMMC, "aclk_emmc", mux_aclk_emmc_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(21), 7, 1, MFLAGS, 0, 5, DFLAGS), GATE(ACLK_EMMC_CORE, "aclk_emmccore", "aclk_emmc", CLK_IGNORE_UNUSED, -- cgit v0.10.2 From b33ecca87df99fa6fff8a1d455de96f436934dcf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 8 Aug 2016 21:55:39 +0200 Subject: phy-sun4i-usb: Add support for peripheral-only mode Use the new of_usb_get_dr_mode_by_phy() function to get the dr_mode from the musb controller node instead of assuming that having an id_det gpio means otg mode, and not having one means host mode. Implement peripheral-only mode by adding a sun4i_usb_phy0_get_id_det helper which looks at the dr_mode, always registering our extcon and always monitoring vbus. If dr_mode is not specified in the dts, do not register phy0 as we then do not know how to treat it. This is actually a good thing as this means we will not be registering phy0 on devices where the otg controller is not enabled in the devicetree. Signed-off-by: Hans de Goede Acked-by: Kishon Vijay Abraham I Signed-off-by: Kishon Vijay Abraham I diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c index 0a45bc6..8c7eb33 100644 --- a/drivers/phy/phy-sun4i-usb.c +++ b/drivers/phy/phy-sun4i-usb.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #define REG_ISCR 0x00 @@ -110,6 +111,7 @@ struct sun4i_usb_phy_cfg { struct sun4i_usb_phy_data { void __iomem *base; const struct sun4i_usb_phy_cfg *cfg; + enum usb_dr_mode dr_mode; struct mutex mutex; struct sun4i_usb_phy { struct phy *phy; @@ -120,6 +122,7 @@ struct sun4i_usb_phy_data { bool regulator_on; int index; } phys[MAX_PHYS]; + int first_phy; /* phy0 / otg related variables */ struct extcon_dev *extcon; bool phy0_init; @@ -285,16 +288,10 @@ static int sun4i_usb_phy_init(struct phy *_phy) sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_DPDM_PULLUP_EN); sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_ID_PULLUP_EN); - if (data->id_det_gpio) { - /* OTG mode, force ISCR and cable state updates */ - data->id_det = -1; - data->vbus_det = -1; - queue_delayed_work(system_wq, &data->detect, 0); - } else { - /* Host only mode */ - sun4i_usb_phy0_set_id_detect(_phy, 0); - sun4i_usb_phy0_set_vbus_detect(_phy, 1); - } + /* Force ISCR and cable state updates */ + data->id_det = -1; + data->vbus_det = -1; + queue_delayed_work(system_wq, &data->detect, 0); } return 0; @@ -319,6 +316,19 @@ static int sun4i_usb_phy_exit(struct phy *_phy) return 0; } +static int sun4i_usb_phy0_get_id_det(struct sun4i_usb_phy_data *data) +{ + switch (data->dr_mode) { + case USB_DR_MODE_OTG: + return gpiod_get_value_cansleep(data->id_det_gpio); + case USB_DR_MODE_HOST: + return 0; + case USB_DR_MODE_PERIPHERAL: + default: + return 1; + } +} + static int sun4i_usb_phy0_get_vbus_det(struct sun4i_usb_phy_data *data) { if (data->vbus_det_gpio) @@ -432,7 +442,10 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work) struct phy *phy0 = data->phys[0].phy; int id_det, vbus_det, id_notify = 0, vbus_notify = 0; - id_det = gpiod_get_value_cansleep(data->id_det_gpio); + if (phy0 == NULL) + return; + + id_det = sun4i_usb_phy0_get_id_det(data); vbus_det = sun4i_usb_phy0_get_vbus_det(data); mutex_lock(&phy0->mutex); @@ -448,7 +461,8 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work) * without vbus detection report vbus low for long enough for * the musb-ip to end the current device session. */ - if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 0) { + if (data->dr_mode == USB_DR_MODE_OTG && + !sun4i_usb_phy0_have_vbus_det(data) && id_det == 0) { sun4i_usb_phy0_set_vbus_detect(phy0, 0); msleep(200); sun4i_usb_phy0_set_vbus_detect(phy0, 1); @@ -474,7 +488,8 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work) * without vbus detection report vbus low for long enough to * the musb-ip to end the current host session. */ - if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 1) { + if (data->dr_mode == USB_DR_MODE_OTG && + !sun4i_usb_phy0_have_vbus_det(data) && id_det == 1) { mutex_lock(&phy0->mutex); sun4i_usb_phy0_set_vbus_detect(phy0, 0); msleep(1000); @@ -519,7 +534,8 @@ static struct phy *sun4i_usb_phy_xlate(struct device *dev, { struct sun4i_usb_phy_data *data = dev_get_drvdata(dev); - if (args->args[0] >= data->cfg->num_phys) + if (args->args[0] < data->first_phy || + args->args[0] >= data->cfg->num_phys) return ERR_PTR(-ENODEV); return data->phys[args->args[0]].phy; @@ -593,13 +609,17 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - /* vbus_det without id_det makes no sense, and is not supported */ - if (sun4i_usb_phy0_have_vbus_det(data) && !data->id_det_gpio) { - dev_err(dev, "usb0_id_det missing or invalid\n"); - return -ENODEV; - } - - if (data->id_det_gpio) { + data->dr_mode = of_usb_get_dr_mode_by_phy(np, 0); + switch (data->dr_mode) { + case USB_DR_MODE_OTG: + /* otg without id_det makes no sense, and is not supported */ + if (!data->id_det_gpio) { + dev_err(dev, "usb0_id_det missing or invalid\n"); + return -ENODEV; + } + /* fall through */ + case USB_DR_MODE_HOST: + case USB_DR_MODE_PERIPHERAL: data->extcon = devm_extcon_dev_allocate(dev, sun4i_usb_phy0_cable); if (IS_ERR(data->extcon)) @@ -610,9 +630,13 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) dev_err(dev, "failed to register extcon: %d\n", ret); return ret; } + break; + default: + dev_info(dev, "dr_mode unknown, not registering usb phy0\n"); + data->first_phy = 1; } - for (i = 0; i < data->cfg->num_phys; i++) { + for (i = data->first_phy; i < data->cfg->num_phys; i++) { struct sun4i_usb_phy *phy = data->phys + i; char name[16]; -- cgit v0.10.2 From 1766e7b3763a0707c2fda9689a7866dceed07b7a Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 10 Aug 2016 18:49:27 +0530 Subject: mfd: da8xx-cfgchip: New header file for CFGCHIP registers Create a new header file for TI DA8XX SoC CFGCHIPx registers. This will be used by a number of planned drivers including a new USB PHY driver and common clock framework drivers. The same defines *will* be removed from the platform_data header, once all the users start using the new syscon device header. This also fixes the following compiler error caused due to a dependent patch not merged. drivers/phy/phy-da8xx-usb.c:19:37: fatal error: linux/mfd/da8xx-cfgchip.h: No such file or directory #include Signed-off-by: David Lechner Acked-by: Lee Jones Reported-by: Arnd Bergmann Signed-off-by: Kishon Vijay Abraham I diff --git a/include/linux/mfd/da8xx-cfgchip.h b/include/linux/mfd/da8xx-cfgchip.h new file mode 100644 index 0000000..304985e --- /dev/null +++ b/include/linux/mfd/da8xx-cfgchip.h @@ -0,0 +1,153 @@ +/* + * TI DaVinci DA8xx CHIPCFGx registers for syscon consumers. + * + * Copyright (C) 2016 David Lechner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_MFD_DA8XX_CFGCHIP_H +#define __LINUX_MFD_DA8XX_CFGCHIP_H + +#include + +/* register offset (32-bit registers) */ +#define CFGCHIP(n) ((n) * 4) + +/* CFGCHIP0 (PLL0/EDMA3_0) register bits */ +#define CFGCHIP0_PLL_MASTER_LOCK BIT(4) +#define CFGCHIP0_EDMA30TC1DBS(n) ((n) << 2) +#define CFGCHIP0_EDMA30TC1DBS_MASK CFGCHIP0_EDMA30TC1DBS(0x3) +#define CFGCHIP0_EDMA30TC1DBS_16 CFGCHIP0_EDMA30TC1DBS(0x0) +#define CFGCHIP0_EDMA30TC1DBS_32 CFGCHIP0_EDMA30TC1DBS(0x1) +#define CFGCHIP0_EDMA30TC1DBS_64 CFGCHIP0_EDMA30TC1DBS(0x2) +#define CFGCHIP0_EDMA30TC0DBS(n) ((n) << 0) +#define CFGCHIP0_EDMA30TC0DBS_MASK CFGCHIP0_EDMA30TC0DBS(0x3) +#define CFGCHIP0_EDMA30TC0DBS_16 CFGCHIP0_EDMA30TC0DBS(0x0) +#define CFGCHIP0_EDMA30TC0DBS_32 CFGCHIP0_EDMA30TC0DBS(0x1) +#define CFGCHIP0_EDMA30TC0DBS_64 CFGCHIP0_EDMA30TC0DBS(0x2) + +/* CFGCHIP1 (eCAP/HPI/EDMA3_1/eHRPWM TBCLK/McASP0 AMUTEIN) register bits */ +#define CFGCHIP1_CAP2SRC(n) ((n) << 27) +#define CFGCHIP1_CAP2SRC_MASK CFGCHIP1_CAP2SRC(0x1f) +#define CFGCHIP1_CAP2SRC_ECAP_PIN CFGCHIP1_CAP2SRC(0x0) +#define CFGCHIP1_CAP2SRC_MCASP0_TX CFGCHIP1_CAP2SRC(0x1) +#define CFGCHIP1_CAP2SRC_MCASP0_RX CFGCHIP1_CAP2SRC(0x2) +#define CFGCHIP1_CAP2SRC_EMAC_C0_RX_THRESHOLD CFGCHIP1_CAP2SRC(0x7) +#define CFGCHIP1_CAP2SRC_EMAC_C0_RX CFGCHIP1_CAP2SRC(0x8) +#define CFGCHIP1_CAP2SRC_EMAC_C0_TX CFGCHIP1_CAP2SRC(0x9) +#define CFGCHIP1_CAP2SRC_EMAC_C0_MISC CFGCHIP1_CAP2SRC(0xa) +#define CFGCHIP1_CAP2SRC_EMAC_C1_RX_THRESHOLD CFGCHIP1_CAP2SRC(0xb) +#define CFGCHIP1_CAP2SRC_EMAC_C1_RX CFGCHIP1_CAP2SRC(0xc) +#define CFGCHIP1_CAP2SRC_EMAC_C1_TX CFGCHIP1_CAP2SRC(0xd) +#define CFGCHIP1_CAP2SRC_EMAC_C1_MISC CFGCHIP1_CAP2SRC(0xe) +#define CFGCHIP1_CAP2SRC_EMAC_C2_RX_THRESHOLD CFGCHIP1_CAP2SRC(0xf) +#define CFGCHIP1_CAP2SRC_EMAC_C2_RX CFGCHIP1_CAP2SRC(0x10) +#define CFGCHIP1_CAP2SRC_EMAC_C2_TX CFGCHIP1_CAP2SRC(0x11) +#define CFGCHIP1_CAP2SRC_EMAC_C2_MISC CFGCHIP1_CAP2SRC(0x12) +#define CFGCHIP1_CAP1SRC(n) ((n) << 22) +#define CFGCHIP1_CAP1SRC_MASK CFGCHIP1_CAP1SRC(0x1f) +#define CFGCHIP1_CAP1SRC_ECAP_PIN CFGCHIP1_CAP1SRC(0x0) +#define CFGCHIP1_CAP1SRC_MCASP0_TX CFGCHIP1_CAP1SRC(0x1) +#define CFGCHIP1_CAP1SRC_MCASP0_RX CFGCHIP1_CAP1SRC(0x2) +#define CFGCHIP1_CAP1SRC_EMAC_C0_RX_THRESHOLD CFGCHIP1_CAP1SRC(0x7) +#define CFGCHIP1_CAP1SRC_EMAC_C0_RX CFGCHIP1_CAP1SRC(0x8) +#define CFGCHIP1_CAP1SRC_EMAC_C0_TX CFGCHIP1_CAP1SRC(0x9) +#define CFGCHIP1_CAP1SRC_EMAC_C0_MISC CFGCHIP1_CAP1SRC(0xa) +#define CFGCHIP1_CAP1SRC_EMAC_C1_RX_THRESHOLD CFGCHIP1_CAP1SRC(0xb) +#define CFGCHIP1_CAP1SRC_EMAC_C1_RX CFGCHIP1_CAP1SRC(0xc) +#define CFGCHIP1_CAP1SRC_EMAC_C1_TX CFGCHIP1_CAP1SRC(0xd) +#define CFGCHIP1_CAP1SRC_EMAC_C1_MISC CFGCHIP1_CAP1SRC(0xe) +#define CFGCHIP1_CAP1SRC_EMAC_C2_RX_THRESHOLD CFGCHIP1_CAP1SRC(0xf) +#define CFGCHIP1_CAP1SRC_EMAC_C2_RX CFGCHIP1_CAP1SRC(0x10) +#define CFGCHIP1_CAP1SRC_EMAC_C2_TX CFGCHIP1_CAP1SRC(0x11) +#define CFGCHIP1_CAP1SRC_EMAC_C2_MISC CFGCHIP1_CAP1SRC(0x12) +#define CFGCHIP1_CAP0SRC(n) ((n) << 17) +#define CFGCHIP1_CAP0SRC_MASK CFGCHIP1_CAP0SRC(0x1f) +#define CFGCHIP1_CAP0SRC_ECAP_PIN CFGCHIP1_CAP0SRC(0x0) +#define CFGCHIP1_CAP0SRC_MCASP0_TX CFGCHIP1_CAP0SRC(0x1) +#define CFGCHIP1_CAP0SRC_MCASP0_RX CFGCHIP1_CAP0SRC(0x2) +#define CFGCHIP1_CAP0SRC_EMAC_C0_RX_THRESHOLD CFGCHIP1_CAP0SRC(0x7) +#define CFGCHIP1_CAP0SRC_EMAC_C0_RX CFGCHIP1_CAP0SRC(0x8) +#define CFGCHIP1_CAP0SRC_EMAC_C0_TX CFGCHIP1_CAP0SRC(0x9) +#define CFGCHIP1_CAP0SRC_EMAC_C0_MISC CFGCHIP1_CAP0SRC(0xa) +#define CFGCHIP1_CAP0SRC_EMAC_C1_RX_THRESHOLD CFGCHIP1_CAP0SRC(0xb) +#define CFGCHIP1_CAP0SRC_EMAC_C1_RX CFGCHIP1_CAP0SRC(0xc) +#define CFGCHIP1_CAP0SRC_EMAC_C1_TX CFGCHIP1_CAP0SRC(0xd) +#define CFGCHIP1_CAP0SRC_EMAC_C1_MISC CFGCHIP1_CAP0SRC(0xe) +#define CFGCHIP1_CAP0SRC_EMAC_C2_RX_THRESHOLD CFGCHIP1_CAP0SRC(0xf) +#define CFGCHIP1_CAP0SRC_EMAC_C2_RX CFGCHIP1_CAP0SRC(0x10) +#define CFGCHIP1_CAP0SRC_EMAC_C2_TX CFGCHIP1_CAP0SRC(0x11) +#define CFGCHIP1_CAP0SRC_EMAC_C2_MISC CFGCHIP1_CAP0SRC(0x12) +#define CFGCHIP1_HPIBYTEAD BIT(16) +#define CFGCHIP1_HPIENA BIT(15) +#define CFGCHIP0_EDMA31TC0DBS(n) ((n) << 13) +#define CFGCHIP0_EDMA31TC0DBS_MASK CFGCHIP0_EDMA31TC0DBS(0x3) +#define CFGCHIP0_EDMA31TC0DBS_16 CFGCHIP0_EDMA31TC0DBS(0x0) +#define CFGCHIP0_EDMA31TC0DBS_32 CFGCHIP0_EDMA31TC0DBS(0x1) +#define CFGCHIP0_EDMA31TC0DBS_64 CFGCHIP0_EDMA31TC0DBS(0x2) +#define CFGCHIP1_TBCLKSYNC BIT(12) +#define CFGCHIP1_AMUTESEL0(n) ((n) << 0) +#define CFGCHIP1_AMUTESEL0_MASK CFGCHIP1_AMUTESEL0(0xf) +#define CFGCHIP1_AMUTESEL0_LOW CFGCHIP1_AMUTESEL0(0x0) +#define CFGCHIP1_AMUTESEL0_BANK_0 CFGCHIP1_AMUTESEL0(0x1) +#define CFGCHIP1_AMUTESEL0_BANK_1 CFGCHIP1_AMUTESEL0(0x2) +#define CFGCHIP1_AMUTESEL0_BANK_2 CFGCHIP1_AMUTESEL0(0x3) +#define CFGCHIP1_AMUTESEL0_BANK_3 CFGCHIP1_AMUTESEL0(0x4) +#define CFGCHIP1_AMUTESEL0_BANK_4 CFGCHIP1_AMUTESEL0(0x5) +#define CFGCHIP1_AMUTESEL0_BANK_5 CFGCHIP1_AMUTESEL0(0x6) +#define CFGCHIP1_AMUTESEL0_BANK_6 CFGCHIP1_AMUTESEL0(0x7) +#define CFGCHIP1_AMUTESEL0_BANK_7 CFGCHIP1_AMUTESEL0(0x8) + +/* CFGCHIP2 (USB PHY) register bits */ +#define CFGCHIP2_PHYCLKGD BIT(17) +#define CFGCHIP2_VBUSSENSE BIT(16) +#define CFGCHIP2_RESET BIT(15) +#define CFGCHIP2_OTGMODE(n) ((n) << 13) +#define CFGCHIP2_OTGMODE_MASK CFGCHIP2_OTGMODE(0x3) +#define CFGCHIP2_OTGMODE_NO_OVERRIDE CFGCHIP2_OTGMODE(0x0) +#define CFGCHIP2_OTGMODE_FORCE_HOST CFGCHIP2_OTGMODE(0x1) +#define CFGCHIP2_OTGMODE_FORCE_DEVICE CFGCHIP2_OTGMODE(0x2) +#define CFGCHIP2_OTGMODE_FORCE_HOST_VBUS_LOW CFGCHIP2_OTGMODE(0x3) +#define CFGCHIP2_USB1PHYCLKMUX BIT(12) +#define CFGCHIP2_USB2PHYCLKMUX BIT(11) +#define CFGCHIP2_PHYPWRDN BIT(10) +#define CFGCHIP2_OTGPWRDN BIT(9) +#define CFGCHIP2_DATPOL BIT(8) +#define CFGCHIP2_USB1SUSPENDM BIT(7) +#define CFGCHIP2_PHY_PLLON BIT(6) +#define CFGCHIP2_SESENDEN BIT(5) +#define CFGCHIP2_VBDTCTEN BIT(4) +#define CFGCHIP2_REFFREQ(n) ((n) << 0) +#define CFGCHIP2_REFFREQ_MASK CFGCHIP2_REFFREQ(0xf) +#define CFGCHIP2_REFFREQ_12MHZ CFGCHIP2_REFFREQ(0x1) +#define CFGCHIP2_REFFREQ_24MHZ CFGCHIP2_REFFREQ(0x2) +#define CFGCHIP2_REFFREQ_48MHZ CFGCHIP2_REFFREQ(0x3) +#define CFGCHIP2_REFFREQ_19_2MHZ CFGCHIP2_REFFREQ(0x4) +#define CFGCHIP2_REFFREQ_38_4MHZ CFGCHIP2_REFFREQ(0x5) +#define CFGCHIP2_REFFREQ_13MHZ CFGCHIP2_REFFREQ(0x6) +#define CFGCHIP2_REFFREQ_26MHZ CFGCHIP2_REFFREQ(0x7) +#define CFGCHIP2_REFFREQ_20MHZ CFGCHIP2_REFFREQ(0x8) +#define CFGCHIP2_REFFREQ_40MHZ CFGCHIP2_REFFREQ(0x9) + +/* CFGCHIP3 (EMAC/uPP/PLL1/ASYNC3/PRU/DIV4.5/EMIFA) register bits */ +#define CFGCHIP3_RMII_SEL BIT(8) +#define CFGCHIP3_UPP_TX_CLKSRC BIT(6) +#define CFGCHIP3_PLL1_MASTER_LOCK BIT(5) +#define CFGCHIP3_ASYNC3_CLKSRC BIT(4) +#define CFGCHIP3_PRUEVTSEL BIT(3) +#define CFGCHIP3_DIV45PENA BIT(2) +#define CFGCHIP3_EMA_CLKSRC BIT(1) + +/* CFGCHIP4 (McASP0 AMUNTEIN) register bits */ +#define CFGCHIP4_AMUTECLR0 BIT(0) + +#endif /* __LINUX_MFD_DA8XX_CFGCHIP_H */ -- cgit v0.10.2 From bf8ca651e1f8f054b39b6b3b95d6f515c3c857d5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 10 Aug 2016 18:04:44 +0800 Subject: phy: brcm-sata: Return proper error if brcm_sata_phy_init fails Return proper error instead of 0 if brcm_sata_phy_init fails. Signed-off-by: Axel Lin Signed-off-by: Kishon Vijay Abraham I diff --git a/drivers/phy/phy-brcm-sata.c b/drivers/phy/phy-brcm-sata.c index 18d6626..8ffc44a 100644 --- a/drivers/phy/phy-brcm-sata.c +++ b/drivers/phy/phy-brcm-sata.c @@ -367,7 +367,7 @@ static int brcm_sata_phy_init(struct phy *phy) rc = -ENODEV; }; - return 0; + return rc; } static const struct phy_ops phy_ops = { -- cgit v0.10.2 From 017300da3a4547d85e52c2484fc0bd759e1bbcdb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 6 Jul 2016 08:00:06 +0200 Subject: phy: sun9i-usb: fix error handling This is likely that checking 'phy->hsic_clk' instead of 'phy->clk' is expected here. Signed-off-by: Christophe JAILLET Acked-by: Chen-Yu Tsai diff --git a/drivers/phy/phy-sun9i-usb.c b/drivers/phy/phy-sun9i-usb.c index ac4f31a..28fce4b 100644 --- a/drivers/phy/phy-sun9i-usb.c +++ b/drivers/phy/phy-sun9i-usb.c @@ -141,9 +141,9 @@ static int sun9i_usb_phy_probe(struct platform_device *pdev) } phy->hsic_clk = devm_clk_get(dev, "hsic_12M"); - if (IS_ERR(phy->clk)) { + if (IS_ERR(phy->hsic_clk)) { dev_err(dev, "failed to get hsic_12M clock\n"); - return PTR_ERR(phy->clk); + return PTR_ERR(phy->hsic_clk); } phy->reset = devm_reset_control_get(dev, "hsic"); -- cgit v0.10.2 From 979cf59acc9d634cc140aadd0d2915947ab303cc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Aug 2016 11:45:18 +0000 Subject: ASoC: Intel: Skylake: Fix error return code in skl_probe() Fix to return error code -ENODEV from the error handling case instead of 0, as done elsewhere in this function. Fixes: 87b2bdf02278 ("ASoC: Intel: Skylake: Initialize NHLT table") Signed-off-by: Wei Yongjun Acked-By: Vinod Koul Signed-off-by: Mark Brown diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index cd59536..e3e7641 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -672,8 +672,10 @@ static int skl_probe(struct pci_dev *pci, skl->nhlt = skl_nhlt_init(bus->dev); - if (skl->nhlt == NULL) + if (skl->nhlt == NULL) { + err = -ENODEV; goto out_free; + } skl_nhlt_update_topology_bin(skl); -- cgit v0.10.2 From 4a008c002133aba0276cbc5d116bbb774aaf1b0d Mon Sep 17 00:00:00 2001 From: Thilo Cestonaro Date: Fri, 12 Aug 2016 14:39:10 +0200 Subject: hwmon: (ftsteutates) Correct ftp urls in driver documentation Signed-off-by: Thilo Cestonaro Signed-off-by: Guenter Roeck diff --git a/Documentation/hwmon/ftsteutates b/Documentation/hwmon/ftsteutates index 2a1bf69..8c10a91 100644 --- a/Documentation/hwmon/ftsteutates +++ b/Documentation/hwmon/ftsteutates @@ -19,5 +19,5 @@ enhancements. It can monitor up to 4 voltages, 16 temperatures and implemented in this driver. Specification of the chip can be found here: -ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf -ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf +ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf +ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf -- cgit v0.10.2 From 4608d96fb491125657fd8183a35921e4d4e27bc8 Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Tue, 2 Aug 2016 15:19:57 +0800 Subject: clk: rockchip: fix incorrect GATE bits for {c, g}pll_aclk_perihp_src on rk3399 Sorry to refer incorrect clock diagram, we double check it that the bits configuration of the Xpll_aclk_perihp_src need to be fixed: bit 1 - shows aclk_perihp_cpll_src_en bit 0 - shows aclk_perihp_gpll_src_en Through the testing that plug/unplug the USB ethernet cable on the RK3399 kevin board. 1. the hclk_host0 and hclk_host1 are endpoint clocks: cpll --> G5[1] --> aclk_perihp_cpll_src --\ |--> hclk_host0 | --> ... ---> | gpll --> G5[0] --> aclk_perihp_gpll_src --/ |--> hclk_host1 2. there is no clock below the cpll_aclk_perihp_src, and the hclk_hostX are below the gpll_aclk_perihp_src: pll_cpll 1 1 800000000 0 0 cpll 7 19 800000000 0 0 cpll_aclk_perihp_src 0 0 800000000 0 0 ... pll_gpll 1 1 594000000 0 0 gpll 10 10 594000000 0 0 gpll_aclk_perihp_src 2 2 594000000 0 0 hclk_perihp 5 5 74250000 0 0 hclk_host1_arb 2 2 74250000 0 0 hclk_host1 2 2 74250000 0 0 hclk_host0_arb 2 2 74250000 0 0 hclk_host0 2 2 74250000 0 0 3. by default, G5[0] and G5[1] are enabled: localhost ~ # mem r 0xff760314 0x000003e0 4. close the G5[1] (aclk_perihp_cpll_src), and plug/unplug USB ethernet cable, the DUT still works well: localhost ~ # mem w 0xff760314 0xffff03e2 localhost ~ # mem r 0xff760314 0x000003e2 plug/unplug, the work statue is ok 5. close the G5[0] (aclk_perihp_gpll_src), , and plug/unplug USB ethernet cable, the DUT will be crashed: localhost ~ # mem w 0xff760314 0xffff03e1 localhost ~ # mem r 0xff760314 0x000003e1 plug/unplug, the DUT is crashed Summary: bit 1 - shows aclk_perihp_cpll_src_en bit 0 - shows aclk_perihp_gpll_src_en Fixes: 3bd14ae9da91 ("clk: rockchip: fix incorrect parent for rk3399's {c,g}pll_aclk_perihp_src") Signed-off-by: Xing Zheng [here the clock-documentation in the manual was actually stating the wrong bits and thus only Xing's testing above revealed the issue] Signed-off-by: Heiko Stuebner diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 01fa60e..ec5b2fd 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -833,9 +833,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { /* perihp */ GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED, - RK3399_CLKGATE_CON(5), 0, GFLAGS), - GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(5), 1, GFLAGS), + GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, + RK3399_CLKGATE_CON(5), 0, GFLAGS), COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS, RK3399_CLKGATE_CON(5), 2, GFLAGS), -- cgit v0.10.2 From fc520f8b4fa35ceb36f0ad031c1a297968788236 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 12 Aug 2016 13:49:34 +0800 Subject: of/platform: disable the of_platform_default_populate_init() for all the ppc boards With the commit 44a7185c2ae6 ("of/platform: Add common method to populate default bus"), a default function is introduced to populate the default bus and this function is invoked at the arch_initcall_sync level. But a lot of ppc boards use machine_device_initcall() to populate the default bus. This means that the default populate function has higher priority and would override the arch specific population of the bus. The side effect is that some arch specific bus are not probed, then cause various malfunction due to the miss of some devices. Since it is very possible to introduce bugs if we simply change the initcall level for all these boards(about 30+). This just disable this default function for all the ppc boards. Signed-off-by: Kevin Hao Signed-off-by: Rob Herring diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 8aa1976..f39ccd5 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -497,6 +497,7 @@ int of_platform_default_populate(struct device_node *root, } EXPORT_SYMBOL_GPL(of_platform_default_populate); +#ifndef CONFIG_PPC static int __init of_platform_default_populate_init(void) { struct device_node *node; @@ -521,6 +522,7 @@ static int __init of_platform_default_populate_init(void) return 0; } arch_initcall_sync(of_platform_default_populate_init); +#endif static int of_platform_device_destroy(struct device *dev, void *data) { -- cgit v0.10.2 From a7a0729c4532c94f4e8efb4faaf6ef00e5fe19ba Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 12 Aug 2016 14:11:12 -0600 Subject: docs: Set the Sphinx default highlight language to "guess" This should eliminate a whole class of markup warnings, at the cost of occasionally amusing markup choices; we'll have to see if it works out. Suggested-by: Markus Heiser Signed-off-by: Jonathan Corbet diff --git a/Documentation/conf.py b/Documentation/conf.py index 96b7aa6..106ae9c 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -131,7 +131,7 @@ pygments_style = 'sphinx' todo_include_todos = False primary_domain = 'C' -highlight_language = 'C' +highlight_language = 'guess' # -- Options for HTML output ---------------------------------------------- -- cgit v0.10.2 From ce7c6c9e1d997a2670aead3a7b87f4df32c11118 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Sat, 30 Jul 2016 10:06:26 -0600 Subject: mpt3sas: Fix resume on WarpDrive flash cards mpt3sas crashes on resume after suspend with WarpDrive flash cards. The reply_post_host_index array is not set back up after the resume, and we deference a stale pointer in _base_interrupt(). [ 47.309711] BUG: unable to handle kernel paging request at ffffc90001f8006c [ 47.318289] IP: [] _base_interrupt+0x49f/0xa30 [mpt3sas] [ 47.326749] PGD 41ccaa067 PUD 41ccab067 PMD 3466c067 PTE 0 [ 47.333848] Oops: 0002 [#1] SMP ... [ 47.452708] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.7.0 #6 [ 47.460506] Hardware name: Dell Inc. OptiPlex 990/06D7TR, BIOS A18 09/24/2013 [ 47.469629] task: ffffffff81c0d500 ti: ffffffff81c00000 task.ti: ffffffff81c00000 [ 47.479112] RIP: 0010:[] [] _base_interrupt+0x49f/0xa30 [mpt3sas] [ 47.490466] RSP: 0018:ffff88041d203e30 EFLAGS: 00010002 [ 47.497801] RAX: 0000000000000001 RBX: ffff880033f4c000 RCX: 0000000000000001 [ 47.506973] RDX: ffffc90001f8006c RSI: 0000000000000082 RDI: 0000000000000082 [ 47.516141] RBP: ffff88041d203eb0 R08: ffff8804118e2820 R09: 0000000000000001 [ 47.525300] R10: 0000000000000001 R11: 00000000100c0000 R12: 0000000000000000 [ 47.534457] R13: ffff880412c487e0 R14: ffff88041a8987d8 R15: 0000000000000001 [ 47.543632] FS: 0000000000000000(0000) GS:ffff88041d200000(0000) knlGS:0000000000000000 [ 47.553796] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.561632] CR2: ffffc90001f8006c CR3: 0000000001c06000 CR4: 00000000000406f0 [ 47.570883] Stack: [ 47.575015] 000000001d211228 ffff88041d2100c0 ffff8800c47d8130 0000000000000100 [ 47.584625] ffff8804100c0000 100c000000000000 ffff88041a8992a0 ffff88041a8987f8 [ 47.594230] ffff88041d203e00 ffffffff81111e55 000000000000038c ffff880414ad4280 [ 47.603862] Call Trace: [ 47.608474] [ 47.610413] [] ? call_timer_fn+0x35/0x120 [ 47.620539] [] handle_irq_event_percpu+0x7f/0x1c0 [ 47.629061] [] handle_irq_event+0x2c/0x50 [ 47.636859] [] handle_edge_irq+0x6f/0x130 [ 47.644654] [] handle_irq+0x73/0x120 [ 47.652011] [] ? atomic_notifier_call_chain+0x1a/0x20 [ 47.660854] [] do_IRQ+0x4b/0xd0 [ 47.667777] [] common_interrupt+0x8c/0x8c [ 47.675635] Move the reply_post_host_index array setup into mpt3sas_base_map_resources(), which is also in the resume path. Cc: stable@vger.kernel.org Signed-off-by: Greg Edwards Acked-by: Chaitra P B Signed-off-by: Martin K. Petersen diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 751f13e..750f82c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2188,6 +2188,17 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc) } else ioc->msix96_vector = 0; + if (ioc->is_warpdrive) { + ioc->reply_post_host_index[0] = (resource_size_t __iomem *) + &ioc->chip->ReplyPostHostIndex; + + for (i = 1; i < ioc->cpu_msix_table_sz; i++) + ioc->reply_post_host_index[i] = + (resource_size_t __iomem *) + ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1) + * 4))); + } + list_for_each_entry(reply_q, &ioc->reply_queue_list, list) pr_info(MPT3SAS_FMT "%s: IRQ %d\n", reply_q->name, ((ioc->msix_enable) ? "PCI-MSI-X enabled" : @@ -5280,17 +5291,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) if (r) goto out_free_resources; - if (ioc->is_warpdrive) { - ioc->reply_post_host_index[0] = (resource_size_t __iomem *) - &ioc->chip->ReplyPostHostIndex; - - for (i = 1; i < ioc->cpu_msix_table_sz; i++) - ioc->reply_post_host_index[i] = - (resource_size_t __iomem *) - ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1) - * 4))); - } - pci_set_drvdata(ioc->pdev, ioc->shost); r = _base_get_ioc_facts(ioc, CAN_SLEEP); if (r) -- cgit v0.10.2 From 5381cfb6f0422da24cfa9da35b0433c0415830e0 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Fri, 12 Aug 2016 09:10:27 -0400 Subject: power: supply: max17042_battery: fix model download bug. The device's model download function returns the model data as an array of u32s, which is later compared to the reference model data. However, since the latter is an array of u16s, the comparison does not happen correctly, and model verification fails. This in turn breaks the POR initialization sequence. Fixes: 39e7213edc4f3 ("max17042_battery: Support regmap to access device's registers") Reported-by: Dan Carpenter Signed-off-by: Sven Van Asbroeck Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c index 9c65f13..da7a75f 100644 --- a/drivers/power/max17042_battery.c +++ b/drivers/power/max17042_battery.c @@ -457,13 +457,16 @@ static inline void max17042_write_model_data(struct max17042_chip *chip, } static inline void max17042_read_model_data(struct max17042_chip *chip, - u8 addr, u32 *data, int size) + u8 addr, u16 *data, int size) { struct regmap *map = chip->regmap; int i; + u32 tmp; - for (i = 0; i < size; i++) - regmap_read(map, addr + i, &data[i]); + for (i = 0; i < size; i++) { + regmap_read(map, addr + i, &tmp); + data[i] = (u16)tmp; + } } static inline int max17042_model_data_compare(struct max17042_chip *chip, @@ -486,7 +489,7 @@ static int max17042_init_model(struct max17042_chip *chip) { int ret; int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl); - u32 *temp_data; + u16 *temp_data; temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL); if (!temp_data) @@ -501,7 +504,7 @@ static int max17042_init_model(struct max17042_chip *chip) ret = max17042_model_data_compare( chip, chip->pdata->config_data->cell_char_tbl, - (u16 *)temp_data, + temp_data, table_size); max10742_lock_model(chip); @@ -514,7 +517,7 @@ static int max17042_verify_model_lock(struct max17042_chip *chip) { int i; int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl); - u32 *temp_data; + u16 *temp_data; int ret = 0; temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL); -- cgit v0.10.2 From 7a4947cf6f26b7d0a5db260d212f6df41a563d23 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Fri, 12 Aug 2016 18:01:50 +0800 Subject: power: reset: reboot-mode: fix build error of missing ioremap/iounmap on UM commit 4fcd504edbf7 ("power: reset: add reboot mode driver") uses api from syscon, and syscon uses ioremap/iounmap which depends on HAS_IOMEM, so let's depend on MFD_SYSCON instead of selecting it directly to avoid the um-allyesconfig like build error on archs that without iomem: drivers/mfd/syscon.c: In function 'of_syscon_register': drivers/mfd/syscon.c:67:9: error: implicit declaration of function 'ioremap' [-Werror=implicit-function-declaration] base = ioremap(res.start, resource_size(&res)); ^ drivers/mfd/syscon.c:67:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion] base = ioremap(res.start, resource_size(&res)); ^ drivers/mfd/syscon.c:109:2: error: implicit declaration of function 'iounmap' [-Werror=implicit-function-declaration] iounmap(base); ^ Reported-by: Kbuild test robot Fixes: 4fcd504edbf7("power: reset: add reboot mode driver") Signed-off-by: Andy Yan Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index 3bfac53..c74c3f6 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -200,8 +200,8 @@ config REBOOT_MODE config SYSCON_REBOOT_MODE tristate "Generic SYSCON regmap reboot mode driver" depends on OF + depends on MFD_SYSCON select REBOOT_MODE - select MFD_SYSCON help Say y here will enable reboot mode driver. This will get reboot mode arguments and store it in SYSCON mapped -- cgit v0.10.2 From bae170efd6c42bf116f513a1dd07639d68fa71b9 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 12 Aug 2016 20:49:18 +0530 Subject: power: reset: hisi-reboot: Unmap region obtained by of_iomap Free memory mapping, if probe is not successful. Fixes: 4a9b37371822 ("power: reset: move hisilicon reboot code") Signed-off-by: Arvind Yadav Signed-off-by: Sebastian Reichel diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c index 9ab7f56..f69387e 100644 --- a/drivers/power/reset/hisi-reboot.c +++ b/drivers/power/reset/hisi-reboot.c @@ -53,13 +53,16 @@ static int hisi_reboot_probe(struct platform_device *pdev) if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) { pr_err("failed to find reboot-offset property\n"); + iounmap(base); return -EINVAL; } err = register_restart_handler(&hisi_restart_nb); - if (err) + if (err) { dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n", err); + iounmap(base); + } return err; } -- cgit v0.10.2 From c2d5be14cb39da34b076b461d8aa2344611364c6 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 12 Aug 2016 15:12:36 -0600 Subject: docs: kernel-documentation: remove some highlight directives With the conf.py change, we don't need them to avoid warnings anymore. Signed-off-by: Jonathan Corbet diff --git a/Documentation/kernel-documentation.rst b/Documentation/kernel-documentation.rst index c4eb504..391decc 100644 --- a/Documentation/kernel-documentation.rst +++ b/Documentation/kernel-documentation.rst @@ -366,8 +366,6 @@ Domain`_ references. Cross-referencing from reStructuredText ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. highlight:: none - To cross-reference the functions and types defined in the kernel-doc comments from reStructuredText documents, please use the `Sphinx C Domain`_ references. For example:: @@ -390,8 +388,6 @@ For further details, please refer to the `Sphinx C Domain`_ documentation. Function documentation ---------------------- -.. highlight:: c - The general format of a function and function-like macro kernel-doc comment is:: /** @@ -572,8 +568,6 @@ DocBook XML [DEPRECATED] Converting DocBook to Sphinx ---------------------------- -.. highlight:: none - Over time, we expect all of the documents under ``Documentation/DocBook`` to be converted to Sphinx and reStructuredText. For most DocBook XML documents, a good enough solution is to use the simple ``Documentation/sphinx/tmplcvt`` script, -- cgit v0.10.2 From e120dcb6b2a11840b08f0fc0d2e90256b7b6e842 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 13 May 2016 13:28:21 -0700 Subject: ses: Fix racy cleanup of /sys in remove_dev() Currently we free the resources backing the enclosure device before we call device_unregister(). This is racy: during rmmod of low-level SCSI drivers that hook into enclosure, we end up with a small window of time during which writing to /sys can OOPS. Example trace with mpt3sas: general protection fault: 0000 [#1] SMP KASAN Modules linked in: mpt3sas(-) <...> RIP: [] ses_get_page2_descriptor.isra.6+0x38/0x220 [ses] Call Trace: [] ses_set_fault+0xf4/0x400 [ses] [] set_component_fault+0xa9/0xf0 [enclosure] [] dev_attr_store+0x3c/0x70 [] sysfs_kf_write+0x115/0x180 [] kernfs_fop_write+0x275/0x3a0 [] __vfs_write+0xe0/0x3e0 [] vfs_write+0x13f/0x4a0 [] SyS_write+0x111/0x230 [] entry_SYSCALL_64_fastpath+0x13/0x94 Fortunately the solution is extremely simple: call device_unregister() before we free the resources, and the race no longer exists. The driver core holds a reference over ->remove_dev(), so AFAICT this is safe. Signed-off-by: Calvin Owens Reviewed-by: James Bottomley Signed-off-by: Martin K. Petersen diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 53ef1cb..0e8601a 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -778,6 +778,8 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev) if (!edev) return; + enclosure_unregister(edev); + ses_dev = edev->scratch; edev->scratch = NULL; @@ -789,7 +791,6 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev) kfree(edev->component[0].scratch); put_device(&edev->edev); - enclosure_unregister(edev); } static void ses_intf_remove(struct device *cdev, -- cgit v0.10.2 From 601bbae0bc10d4306857b93d84240b039b3d9a6c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Aug 2016 23:54:08 +0200 Subject: dsa: mv88e6xxx: hide unused functions When CONFIG_NET_DSA_HWMON is disabled, we get warnings about two unused functions whose only callers are all inside of an #ifdef: drivers/net/dsa/mv88e6xxx.c:3257:12: 'mv88e6xxx_mdio_page_write' defined but not used [-Werror=unused-function] drivers/net/dsa/mv88e6xxx.c:3244:12: 'mv88e6xxx_mdio_page_read' defined but not used [-Werror=unused-function] This adds another ifdef around the function definitions. The warnings appeared after the functions were marked 'static', but the problem was already there before that. Signed-off-by: Arnd Bergmann Fixes: 57d3231057e9 ("net: dsa: mv88e6xxx: fix style issues") Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d36aedd..d1d9d3c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3187,6 +3187,7 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) return err; } +#ifdef CONFIG_NET_DSA_HWMON static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page, int reg) { @@ -3212,6 +3213,7 @@ static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page, return ret; } +#endif static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port) { -- cgit v0.10.2 From 947d2c2cd3340d11a1737d3f4781170cc3a4b74f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 13 Aug 2016 02:54:04 +0200 Subject: PM / sleep: Update some system sleep documentation Update some documentation related to system sleep to document new features and remove outdated information from it. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Reviewed-by: Chen Yu diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt index b96098c..708f87f 100644 --- a/Documentation/power/basic-pm-debugging.txt +++ b/Documentation/power/basic-pm-debugging.txt @@ -164,7 +164,32 @@ load n/2 modules more and try again. Again, if you find the offending module(s), it(they) must be unloaded every time before hibernation, and please report the problem with it(them). -c) Advanced debugging +c) Using the "test_resume" hibernation option + +/sys/power/disk generally tells the kernel what to do after creating a +hibernation image. One of the available options is "test_resume" which +causes the just created image to be used for immediate restoration. Namely, +after doing: + +# echo test_resume > /sys/power/disk +# echo disk > /sys/power/state + +a hibernation image will be created and a resume from it will be triggered +immediately without involving the platform firmware in any way. + +That test can be used to check if failures to resume from hibernation are +related to bad interactions with the platform firmware. That is, if the above +works every time, but resume from actual hibernation does not work or is +unreliable, the platform firmware may be responsible for the failures. + +On architectures and platforms that support using different kernels to restore +hibernation images (that is, the kernel used to read the image from storage and +load it into memory is different from the one included in the image) or support +kernel address space randomization, it also can be used to check if failures +to resume may be related to the differences between the restore and image +kernels. + +d) Advanced debugging In case that hibernation does not work on your system even in the minimal configuration and compiling more drivers as modules is not practical or some diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index f1f0f59a..974916f 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -1,75 +1,76 @@ -Power Management Interface - - -The power management subsystem provides a unified sysfs interface to -userspace, regardless of what architecture or platform one is -running. The interface exists in /sys/power/ directory (assuming sysfs -is mounted at /sys). - -/sys/power/state controls system power state. Reading from this file -returns what states are supported, which is hard-coded to 'freeze', -'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk' -(Suspend-to-Disk). - -Writing to this file one of those strings causes the system to -transition into that state. Please see the file -Documentation/power/states.txt for a description of each of those -states. - - -/sys/power/disk controls the operating mode of the suspend-to-disk -mechanism. Suspend-to-disk can be handled in several ways. We have a -few options for putting the system to sleep - using the platform driver -(e.g. ACPI or other suspend_ops), powering off the system or rebooting the -system (for testing). - -Additionally, /sys/power/disk can be used to turn on one of the two testing -modes of the suspend-to-disk mechanism: 'testproc' or 'test'. If the -suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to -/sys/power/state will cause the kernel to disable nonboot CPUs and freeze -tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs. If it is -in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel -to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait -for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then, -we are able to look in the log messages and work out, for example, which code -is being slow and which device drivers are misbehaving. - -Reading from this file will display all supported modes and the currently -selected one in brackets, for example - - [shutdown] reboot test testproc - -Writing to this file will accept one of - - 'platform' (only if the platform supports it) - 'shutdown' - 'reboot' - 'testproc' - 'test' - -/sys/power/image_size controls the size of the image created by -the suspend-to-disk mechanism. It can be written a string -representing a non-negative integer that will be used as an upper -limit of the image size, in bytes. The suspend-to-disk mechanism will -do its best to ensure the image size will not exceed that number. However, -if this turns out to be impossible, it will try to suspend anyway using the -smallest image possible. In particular, if "0" is written to this file, the -suspend image will be as small as possible. - -Reading from this file will display the current image size limit, which -is set to 2/5 of available RAM by default. - -/sys/power/pm_trace controls the code which saves the last PM event point in -the RTC across reboots, so that you can debug a machine that just hangs -during suspend (or more commonly, during resume). Namely, the RTC is only -used to save the last PM event point if this file contains '1'. Initially it -contains '0' which may be changed to '1' by writing a string representing a -nonzero integer into it. - -To use this debugging feature you should attempt to suspend the machine, then -reboot it and run - - dmesg -s 1000000 | grep 'hash matches' - -CAUTION: Using it will cause your machine's real-time (CMOS) clock to be -set to a random invalid time after a resume. +Power Management Interface for System Sleep + +Copyright (c) 2016 Intel Corp., Rafael J. Wysocki + +The power management subsystem provides userspace with a unified sysfs interface +for system sleep regardless of the underlying system architecture or platform. +The interface is located in the /sys/power/ directory (assuming that sysfs is +mounted at /sys). + +/sys/power/state is the system sleep state control file. + +Reading from it returns a list of supported sleep states, encoded as: + +'freeze' (Suspend-to-Idle) +'standby' (Power-On Suspend) +'mem' (Suspend-to-RAM) +'disk' (Suspend-to-Disk) + +Suspend-to-Idle is always supported. Suspend-to-Disk is always supported +too as long the kernel has been configured to support hibernation at all +(ie. CONFIG_HIBERNATION is set in the kernel configuration file). Support +for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the +platform. + +If one of the strings listed in /sys/power/state is written to it, the system +will attempt to transition into the corresponding sleep state. Refer to +Documentation/power/states.txt for a description of each of those states. + +/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk). +Specifically, it tells the kernel what to do after creating a hibernation image. + +Reading from it returns a list of supported options encoded as: + +'platform' (put the system into sleep using a platform-provided method) +'shutdown' (shut the system down) +'reboot' (reboot the system) +'suspend' (trigger a Suspend-to-RAM transition) +'test_resume' (resume-after-hibernation test mode) + +The currently selected option is printed in square brackets. + +The 'platform' option is only available if the platform provides a special +mechanism to put the system to sleep after creating a hibernation image (ACPI +does that, for example). The 'suspend' option is available if Suspend-to-RAM +is supported. Refer to Documentation/power/basic_pm_debugging.txt for the +description of the 'test_resume' option. + +To select an option, write the string representing it to /sys/power/disk. + +/sys/power/image_size controls the size of hibernation images. + +It can be written a string representing a non-negative integer that will be +used as a best-effort upper limit of the image size, in bytes. The hibernation +core will do its best to ensure that the image size will not exceed that number. +However, if that turns out to be impossible to achieve, a hibernation image will +still be created and its size will be as small as possible. In particular, +writing '0' to this file will enforce hibernation images to be as small as +possible. + +Reading from this file returns the current image size limit, which is set to +around 2/5 of available RAM by default. + +/sys/power/pm_trace controls the PM trace mechanism saving the last suspend +or resume event point in the RTC across reboots. + +It helps to debug hard lockups or reboots due to device driver failures that +occur during system suspend or resume (which is more common) more effectively. + +If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume +event point in turn will be stored in the RTC memory (overwriting the actual +RTC information), so it will survive a system crash if one occurs right after +storing it and it can be used later to identify the driver that caused the crash +to happen (see Documentation/power/s2ram.txt for more information). + +Initially it contains '0' which may be changed to '1' by writing a string +representing a nonzero integer into it. -- cgit v0.10.2 From 747ea55e4f78fd980350c39570a986b8c1c3e4aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 12 Aug 2016 22:17:17 +0200 Subject: bpf: fix bpf_skb_in_cgroup helper naming While hashing out BPF's current_task_under_cgroup helper bits, it came to discussion that the skb_in_cgroup helper name was suboptimally chosen. Tejun says: So, I think in_cgroup should mean that the object is in that particular cgroup while under_cgroup in the subhierarchy of that cgroup. Let's rename the other subhierarchy test to under too. I think that'd be a lot less confusing going forward. [...] It's more intuitive and gives us the room to implement the real "in" test if ever necessary in the future. Since this touches uapi bits, we need to change this as long as v4.8 is not yet officially released. Thus, change the helper enum and rename related bits. Fixes: 4a482f34afcc ("cgroup: bpf: Add bpf_skb_in_cgroup_proto") Reference: http://patchwork.ozlabs.org/patch/658500/ Suggested-by: Sargun Dhillon Suggested-by: Tejun Heo Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da218fe..9e5fc16 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -339,7 +339,7 @@ enum bpf_func_id { BPF_FUNC_skb_change_type, /** - * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb * @skb: pointer to skb * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type * @index: index of the cgroup in the bpf_map @@ -348,7 +348,7 @@ enum bpf_func_id { * == 1 skb succeeded the cgroup2 descendant test * < 0 error */ - BPF_FUNC_skb_in_cgroup, + BPF_FUNC_skb_under_cgroup, /** * bpf_get_hash_recalc(skb) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7094c69..daea765 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1053,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_CGROUP_ARRAY: - if (func_id != BPF_FUNC_skb_in_cgroup) + if (func_id != BPF_FUNC_skb_under_cgroup) goto error; break; default: @@ -1075,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; - case BPF_FUNC_skb_in_cgroup: + case BPF_FUNC_skb_under_cgroup: if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; break; diff --git a/net/core/filter.c b/net/core/filter.c index b5add4e..bd9bf2e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2317,7 +2317,7 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } #ifdef CONFIG_SOCK_CGROUP_DATA -static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long)r1; struct bpf_map *map = (struct bpf_map *)(long)r2; @@ -2340,8 +2340,8 @@ static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); } -static const struct bpf_func_proto bpf_skb_in_cgroup_proto = { - .func = bpf_skb_in_cgroup, +static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { + .func = bpf_skb_under_cgroup, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -2421,8 +2421,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; #ifdef CONFIG_SOCK_CGROUP_DATA - case BPF_FUNC_skb_in_cgroup: - return &bpf_skb_in_cgroup_proto; + case BPF_FUNC_skb_under_cgroup: + return &bpf_skb_under_cgroup_proto; #endif default: return sk_filter_func_proto(func_id); diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 217c8d5..7927a09 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -72,8 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; -static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_skb_in_cgroup; +static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_skb_under_cgroup; #if defined(__x86_64__) diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c index 2732c37..10ff734 100644 --- a/samples/bpf/test_cgrp2_tc_kern.c +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -57,7 +57,7 @@ int handle_egress(struct __sk_buff *skb) bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), eth->h_proto, ip6h->nexthdr); return TC_ACT_OK; - } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { + } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { bpf_trace_printk(pass_msg, sizeof(pass_msg)); return TC_ACT_OK; } else { -- cgit v0.10.2 From b4c0e0c61f81dedc82dda35c287ea149ff98b434 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 11 Aug 2016 18:17:22 +0100 Subject: calipso: fix resource leak on calipso_genopt failure Currently, if calipso_genopt fails then the error exit path does not free the ipv6_opt_hdr new causing a memory leak. Fix this by kfree'ing new on the error exit path. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index c53b92c..37ac9de 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -952,8 +952,10 @@ calipso_opt_insert(struct ipv6_opt_hdr *hop, memcpy(new, hop, start); ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def, secattr); - if (ret_val < 0) + if (ret_val < 0) { + kfree(new); return ERR_PTR(ret_val); + } buf_len = start + ret_val; /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */ -- cgit v0.10.2 From d7005652cd31dfc5660e1e32bf7e53538ef14987 Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" Date: Sat, 13 Aug 2016 19:16:18 +0800 Subject: net: ethernet: mediatek: fixed that initializing u64_stats_sync is missing To fix runtime warning with lockdep is enabled due that u64_stats_sync is not initialized well, so add it. Signed-off-by: Sean Wang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b57ae3a..fe17f8c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1751,6 +1751,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) goto free_netdev; } spin_lock_init(&mac->hw_stats->stats_lock); + u64_stats_init(&mac->hw_stats->syncp); mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET; SET_NETDEV_DEV(eth->netdev[id], eth->dev); -- cgit v0.10.2 From e8c2993a4c9fdb0c9e6fc983edd5b52716ce7442 Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" Date: Sat, 13 Aug 2016 19:16:19 +0800 Subject: net: ethernet: mediatek: add the missing of_node_put() after node is used done This patch adds the missing of_node_put() after finishing the usage of of_parse_phandle() or of_node_get() used by fixed_phy. Signed-off-by: Sean Wang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index fe17f8c..0030361 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -269,6 +269,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) ADVERTISED_Autoneg; phy_start_aneg(mac->phy_dev); + of_node_put(np); + return 0; } -- cgit v0.10.2 From 0ed661d5a48fa6df0b50ae64d27fe759a3ce42cf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 11 Aug 2016 21:38:37 +0200 Subject: bpf: fix write helpers with regards to non-linear parts Fix the bpf_try_make_writable() helper and all call sites we have in BPF, it's currently defect with regards to skbs when the write_len spans into non-linear parts, no matter if cloned or not. There are multiple issues at once. First, using skb_store_bits() is not correct since even if we have a cloned skb, page frags can still be shared. To really make them private, we need to pull them in via __pskb_pull_tail() first, which also gets us a private head via pskb_expand_head() implicitly. This is for helpers like bpf_skb_store_bytes(), bpf_l3_csum_replace(), bpf_l4_csum_replace(). Really, the only thing reasonable and working here is to call skb_ensure_writable() before any write operation. Meaning, via pskb_may_pull() it makes sure that parts we want to access are pulled in and if not does so plus unclones the skb implicitly. If our write_len still fits the headlen and we're cloned and our header of the clone is not writable, then we need to make a private copy via pskb_expand_head(). skb_store_bits() is a bit misleading and only safe to store into non-linear data in different contexts such as 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can corrupt kernel memory"). For above BPF helper functions, it means after fixed bpf_try_make_writable(), we've pulled in enough, so that we operate always based on skb->data. Thus, the call to skb_header_pointer() and skb_store_bits() becomes superfluous. In bpf_skb_store_bytes(), the len check is unnecessary too since it can only pass in maximum of BPF stack size, so adding offset is guaranteed to never overflow. Also bpf_l3/4_csum_replace() helpers must test for proper offset alignment since they use __sum16 pointer for writing resulting csum. The remaining helpers that change skb data not discussed here yet are bpf_skb_vlan_push(), bpf_skb_vlan_pop() and bpf_skb_change_proto(). The vlan helpers internally call either skb_ensure_writable() (pop case) and skb_cow_head() (push case, for head expansion), respectively. Similarly, bpf_skb_proto_xlat() takes care to not mangle page frags. Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action") Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers") Fixes: 3697649ff29e ("bpf: try harder on clones when writing into skb") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/net/core/filter.c b/net/core/filter.c index bd9bf2e..cb06ace 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1355,13 +1355,9 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, { int err; - if (!skb_cloned(skb)) - return 0; - if (skb_clone_writable(skb, write_len)) - return 0; - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (!err) - bpf_compute_data_end(skb); + err = skb_ensure_writable(skb, write_len); + bpf_compute_data_end(skb); + return err; } @@ -1379,42 +1375,25 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { - struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); struct sk_buff *skb = (struct sk_buff *) (long) r1; - int offset = (int) r2; + unsigned int offset = (unsigned int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) return -EINVAL; - - /* bpf verifier guarantees that: - * 'from' pointer points to bpf program stack - * 'len' bytes of it were initialized - * 'len' > 0 - * 'skb' is a valid pointer to 'struct sk_buff' - * - * so check for invalid 'offset' and too large 'len' - */ - if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) + if (unlikely(offset > 0xffff)) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; - ptr = skb_header_pointer(skb, offset, len, sp->buff); - if (unlikely(!ptr)) - return -EFAULT; - + ptr = skb->data + offset; if (flags & BPF_F_RECOMPUTE_CSUM) __skb_postpull_rcsum(skb, ptr, len, offset); memcpy(ptr, from, len); - if (ptr == sp->buff) - /* skb_store_bits cannot return -EFAULT here */ - skb_store_bits(skb, offset, ptr, len); - if (flags & BPF_F_RECOMPUTE_CSUM) __skb_postpush_rcsum(skb, ptr, len, offset); if (flags & BPF_F_INVALIDATE_HASH) @@ -1437,12 +1416,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; - int offset = (int) r2; + unsigned int offset = (unsigned int) r2; void *to = (void *)(unsigned long) r3; unsigned int len = (unsigned int) r4; void *ptr; - if (unlikely((u32) offset > 0xffff)) + if (unlikely(offset > 0xffff)) goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); @@ -1470,20 +1449,17 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - int offset = (int) r2; - __sum16 sum, *ptr; + unsigned int offset = (unsigned int) r2; + __sum16 *ptr; if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) return -EINVAL; - if (unlikely((u32) offset > 0xffff)) - return -EFAULT; - if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) + if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; - - ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); - if (unlikely(!ptr)) + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) return -EFAULT; + ptr = (__sum16 *)(skb->data + offset); switch (flags & BPF_F_HDR_FIELD_MASK) { case 0: if (unlikely(from != 0)) @@ -1501,10 +1477,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) return -EINVAL; } - if (ptr == &sum) - /* skb_store_bits guaranteed to not return -EFAULT here */ - skb_store_bits(skb, offset, ptr, sizeof(sum)); - return 0; } @@ -1524,20 +1496,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) struct sk_buff *skb = (struct sk_buff *) (long) r1; bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; - int offset = (int) r2; - __sum16 sum, *ptr; + unsigned int offset = (unsigned int) r2; + __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) return -EINVAL; - if (unlikely((u32) offset > 0xffff)) + if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; - if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) return -EFAULT; - ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); - if (unlikely(!ptr)) - return -EFAULT; + ptr = (__sum16 *)(skb->data + offset); if (is_mmzero && !*ptr) return 0; @@ -1560,10 +1530,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (is_mmzero && !*ptr) *ptr = CSUM_MANGLED_0; - if (ptr == &sum) - /* skb_store_bits guaranteed to not return -EFAULT here */ - skb_store_bits(skb, offset, ptr, sizeof(sum)); - return 0; } -- cgit v0.10.2 From c15c0ab12fd62f2b19181d05c62d24bc9fa55a42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Aug 2016 07:48:21 +0200 Subject: ipv6: suppress sparse warnings in IP6_ECN_set_ce() Pass the correct type __wsum to csum_sub() and csum_add(). This doesn't really change anything since __wsum really *is* __be32, but removes the address space warnings from sparse. Cc: Eric Dumazet Fixes: 34ae6a1aa054 ("ipv6: update skb->csum when CE mark is propagated") Signed-off-by: Johannes Berg Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 0dc0a51..dce2d58 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -128,7 +128,8 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(csum_sub(skb->csum, from), to); + skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), + (__force __wsum)to); return 1; } -- cgit v0.10.2 From 5ba092efc7ddff040777ae7162f1d195f513571b Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 12 Aug 2016 10:29:13 +0200 Subject: net/irda: handle iriap_register_lsap() allocation failure If iriap_register_lsap() fails to allocate memory, self->lsap is set to NULL. However, none of the callers handle the failure and irlmp_connect_request() will happily dereference it: iriap_register_lsap: Unable to allocated LSAP! ================================================================================ UBSAN: Undefined behaviour in net/irda/irlmp.c:378:2 member access within null pointer of type 'struct lsap_cb' CPU: 1 PID: 15403 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #81 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 0000000000000000 ffff88010c7e78a8 ffffffff82344f40 0000000041b58ab3 ffffffff84f98000 ffffffff82344e94 ffff88010c7e78d0 ffff88010c7e7880 ffff88010630ad00 ffffffff84a5fae0 ffffffff84d3f5c0 000000000000017a Call Trace: [] dump_stack+0xac/0xfc [] ubsan_epilogue+0xd/0x8a [] __ubsan_handle_type_mismatch+0x157/0x411 [] irlmp_connect_request+0x7ac/0x970 [] iriap_connect_request+0xa0/0x160 [] state_s_disconnect+0x88/0xd0 [] iriap_do_client_event+0x94/0x120 [] iriap_getvaluebyclass_request+0x3e0/0x6d0 [] irda_find_lsap_sel+0x1eb/0x630 [] irda_connect+0x828/0x12d0 [] SYSC_connect+0x22b/0x340 [] SyS_connect+0x9/0x10 [] do_syscall_64+0x1b3/0x4b0 [] entry_SYSCALL64_slow_path+0x25/0x25 ================================================================================ The bug seems to have been around since forever. There's more problems with missing error checks in iriap_init() (and indeed all of irda_init()), but that's a bigger problem that needs very careful review and testing. This patch will fix the most serious bug (as it's easily reached from unprivileged userspace). I have tested my patch with a reproducer. Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 4a7ae32a..1138eaf 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, self->magic = IAS_MAGIC; self->mode = mode; - if (mode == IAS_CLIENT) - iriap_register_lsap(self, slsap_sel, mode); + if (mode == IAS_CLIENT) { + if (iriap_register_lsap(self, slsap_sel, mode)) { + kfree(self); + return NULL; + } + } self->confirm = callback; self->priv = priv; -- cgit v0.10.2 From 54236ab09e9696a27baaae693c288920a26e8588 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 12 Aug 2016 09:50:51 +0200 Subject: net/sctp: always initialise sctp_ht_iter::start_fail sctp_transport_seq_start() does not currently clear iter->start_fail on success, but relies on it being zero when it is allocated (by seq_open_net()). This can be a problem in the following sequence: open() // allocates iter (and implicitly sets iter->start_fail = 0) read() - iter->start() // fails and sets iter->start_fail = 1 - iter->stop() // doesn't call sctp_transport_walk_stop() (correct) read() again - iter->start() // succeeds, but doesn't change iter->start_fail - iter->stop() // doesn't call sctp_transport_walk_stop() (wrong) We should initialize sctp_ht_iter::start_fail to zero if ->start() succeeds, otherwise it's possible that we leave an old value of 1 there, which will cause ->stop() to not call sctp_transport_walk_stop(), which causes all sorts of problems like not calling rcu_read_unlock() (and preempt_enable()), eventually leading to more warnings like this: BUG: sleeping function called from invalid context at mm/slab.h:388 in_atomic(): 0, irqs_disabled(): 0, pid: 16551, name: trinity-c2 Preemption disabled at:[] rhashtable_walk_start+0x46/0x150 [] preempt_count_add+0x1fb/0x280 [] _raw_spin_lock+0x12/0x40 [] rhashtable_walk_start+0x46/0x150 [] sctp_transport_walk_start+0x2f/0x60 [] sctp_transport_seq_start+0x4d/0x150 [] traverse+0x170/0x850 [] seq_read+0x7cc/0x1180 [] proc_reg_read+0xbc/0x180 [] do_loop_readv_writev+0x134/0x210 [] do_readv_writev+0x565/0x660 [] vfs_readv+0x67/0xa0 [] do_preadv+0x126/0x170 [] SyS_preadv+0xc/0x10 [] do_syscall_64+0x19c/0x410 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff Notice that this is a subtly different stacktrace from the one in commit 5fc382d875 ("net/sctp: terminate rhashtable walk correctly"). Cc: Xin Long Cc: Herbert Xu Cc: Eric W. Biederman Cc: Marcelo Ricardo Leitner Signed-off-by: Vegard Nossum Acked-By: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 4cb5aed..ef8ba77 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) return ERR_PTR(err); } + iter->start_fail = 0; return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); } -- cgit v0.10.2 From bc561632dddd5af0c4444d919f01cbf6d553aa0a Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Fri, 12 Aug 2016 12:02:38 +0100 Subject: net: ipv6: Do not keep IPv6 addresses when IPv6 is disabled If IPv6 is disabled when the option is set to keep IPv6 addresses on link down, userspace is unaware of this as there is no such indication via netlink. The solution is to remove the IPv6 addresses in this case, which results in netlink messages indicating removal of addresses in the usual manner. This fix also makes the behavior consistent with the case of having IPv6 disabled first, which stops IPv6 addresses from being added. Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Signed-off-by: Mike Manning Acked-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab3e796..df8425f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3543,7 +3543,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* combine the user config with event to determine if permanent * addresses are to be removed from address hash table */ - keep_addr = !(how || _keep_addr <= 0); + keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6); /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { @@ -3599,7 +3599,7 @@ restart: /* re-combine the user config with event to determine if permanent * addresses are to be removed from the interface list */ - keep_addr = (!how && _keep_addr > 0); + keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { -- cgit v0.10.2 From e20038724552cd05e351cd7d7526d646953d26b7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 12 Aug 2016 16:10:32 +0200 Subject: macsec: fix lockdep splats when nesting devices Currently, trying to setup a vlan over a macsec device, or other combinations of devices, triggers a lockdep warning. Use netdev_lockdep_set_classes and ndo_get_lock_subclass, similar to what macvlan does. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index dbd590a..2043e8c 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -270,6 +270,7 @@ struct macsec_dev { struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; + unsigned int nest_level; }; /** @@ -2699,6 +2700,8 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, #define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) +static struct lock_class_key macsec_netdev_addr_lock_key; + static int macsec_dev_init(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); @@ -2910,6 +2913,13 @@ static int macsec_get_iflink(const struct net_device *dev) return macsec_priv(dev)->real_dev->ifindex; } + +static int macsec_get_nest_level(struct net_device *dev) +{ + return macsec_priv(dev)->nest_level; +} + + static const struct net_device_ops macsec_netdev_ops = { .ndo_init = macsec_dev_init, .ndo_uninit = macsec_dev_uninit, @@ -2923,6 +2933,7 @@ static const struct net_device_ops macsec_netdev_ops = { .ndo_start_xmit = macsec_start_xmit, .ndo_get_stats64 = macsec_get_stats64, .ndo_get_iflink = macsec_get_iflink, + .ndo_get_lock_subclass = macsec_get_nest_level, }; static const struct device_type macsec_type = { @@ -3050,10 +3061,12 @@ static void macsec_del_dev(struct macsec_dev *macsec) static void macsec_common_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); + struct net_device *real_dev = macsec->real_dev; unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); + netdev_upper_dev_unlink(real_dev, dev); macsec_generation++; } @@ -3188,6 +3201,16 @@ static int macsec_newlink(struct net *net, struct net_device *dev, dev_hold(real_dev); + macsec->nest_level = dev_get_nest_level(real_dev, netif_is_macsec) + 1; + netdev_lockdep_set_classes(dev); + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &macsec_netdev_addr_lock_key, + macsec_get_nest_level(dev)); + + err = netdev_upper_dev_link(real_dev, dev); + if (err < 0) + goto unregister; + /* need to be already registered so that ->init has run and * the MAC addr is set */ @@ -3200,12 +3223,12 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (rx_handler && sci_exists(real_dev, sci)) { err = -EBUSY; - goto unregister; + goto unlink; } err = macsec_add_dev(dev, sci, icv_len); if (err) - goto unregister; + goto unlink; if (data) macsec_changelink_common(dev, data); @@ -3220,6 +3243,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, del_dev: macsec_del_dev(macsec); +unlink: + netdev_upper_dev_unlink(real_dev, dev); unregister: unregister_netdevice(dev); return err; -- cgit v0.10.2 From 952fcfd08c8109951622579d0ae7b9cd6cafd688 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 12 Aug 2016 16:10:33 +0200 Subject: net: remove type_check from dev_get_nest_level() The idea for type_check in dev_get_nest_level() was to count the number of nested devices of the same type (currently, only macvlan or vlan devices). This prevented the false positive lockdep warning on configurations such as: eth0 <--- macvlan0 <--- vlan0 <--- macvlan1 However, this doesn't prevent a warning on a configuration such as: eth0 <--- macvlan0 <--- vlan0 eth1 <--- vlan1 <--- macvlan1 In this case, all the locks end up with a nesting subclass of 1, so lockdep thinks that there is still a deadlock: - in the first case we have (macvlan_netdev_addr_lock_key, 1) and then take (vlan_netdev_xmit_lock_key, 1) - in the second case, we have (vlan_netdev_xmit_lock_key, 1) and then take (macvlan_netdev_addr_lock_key, 1) By removing the linktype check in dev_get_nest_level() and always incrementing the nesting depth, lockdep considers this configuration valid. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 2043e8c..351e701 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3201,7 +3201,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, dev_hold(real_dev); - macsec->nest_level = dev_get_nest_level(real_dev, netif_is_macsec) + 1; + macsec->nest_level = dev_get_nest_level(real_dev) + 1; netdev_lockdep_set_classes(dev); lockdep_set_class_and_subclass(&dev->addr_list_lock, &macsec_netdev_addr_lock_key, diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cd9b538..3234fcd 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1315,7 +1315,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->dev = dev; vlan->port = port; vlan->set_features = MACVLAN_FEATURES; - vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1; + vlan->nest_level = dev_get_nest_level(lowerdev) + 1; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 076df53..3a788bf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3891,8 +3891,7 @@ void netdev_default_l2upper_neigh_destroy(struct net_device *dev, extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); -int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(const struct net_device *dev)); +int dev_get_nest_level(struct net_device *dev); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 82a116b..8de138d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -169,7 +169,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; - vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1; + vlan->nest_level = dev_get_nest_level(real_dev) + 1; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; diff --git a/net/core/dev.c b/net/core/dev.c index 4ce07dc..dd6ce59 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6045,8 +6045,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_dev_get_private); -int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(const struct net_device *dev)) +int dev_get_nest_level(struct net_device *dev) { struct net_device *lower = NULL; struct list_head *iter; @@ -6056,15 +6055,12 @@ int dev_get_nest_level(struct net_device *dev, ASSERT_RTNL(); netdev_for_each_lower_dev(dev, lower, iter) { - nest = dev_get_nest_level(lower, type_check); + nest = dev_get_nest_level(lower); if (max_nest < nest) max_nest = nest; } - if (type_check(dev)) - max_nest++; - - return max_nest; + return max_nest + 1; } EXPORT_SYMBOL(dev_get_nest_level); -- cgit v0.10.2 From d1669c8288a2c86daa6fd59c7fb938c08589d053 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 5 Aug 2016 11:19:43 +0200 Subject: doc-rst: customize RTD theme, drop padding of inline literal Remove the distracting (left/right) padding of inline literals. (HTML ). Requested and discussed in [1]. [1] http://www.spinics.net/lists/linux-media/msg103991.html Signed-off-by: Markus Heiser Acked-by: Hans Verkuil Signed-off-by: Jonathan Corbet diff --git a/Documentation/sphinx-static/theme_overrides.css b/Documentation/sphinx-static/theme_overrides.css index 3a2ac4b..e88461c 100644 --- a/Documentation/sphinx-static/theme_overrides.css +++ b/Documentation/sphinx-static/theme_overrides.css @@ -42,11 +42,12 @@ caption a.headerlink { opacity: 0; } caption a.headerlink:hover { opacity: 1; } - /* inline literal: drop the borderbox and red color */ + /* inline literal: drop the borderbox, padding and red color */ code, .rst-content tt, .rst-content code { color: inherit; border: none; + padding: unset; background: inherit; font-size: 85%; } -- cgit v0.10.2 From ce8cb803d8b90458495f23606c706f0c0c857cdc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 12 Aug 2016 18:40:23 +0200 Subject: i2c: mux: demux-pinctrl: properly roll back when adding adapter fails We also need to revert the dynamic OF change, so we get a consistent state again. Otherwise, we might have two devices enabled e.g. after pinctrl setup fails. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Cc: stable@kernel.org diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 8de073a..215ac87 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -68,7 +68,7 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np); if (!adap) { ret = -ENODEV; - goto err; + goto err_with_revert; } p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name); @@ -103,6 +103,8 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne err_with_put: i2c_put_adapter(adap); + err_with_revert: + of_changeset_revert(&priv->chan[new_chan].chgset); err: dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret); return ret; -- cgit v0.10.2 From 4d01d88019261d05ec3bff5f1a6013393faa3b9e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 10 Aug 2016 13:37:18 -0700 Subject: i2c: cros-ec-tunnel: Fix usage of cros_ec_cmd_xfer() cros_ec_cmd_xfer returns success status if the command transport completes successfully, but the execution result is incorrectly ignored. In many cases, the execution result is assumed to be successful, leading to ignored errors and operating on uninitialized data. We've recently introduced the cros_ec_cmd_xfer_status() helper to avoid these problems. Let's use it. [Regarding the 'Fixes' tag; there is significant refactoring since the driver's introduction, but the underlying logical error exists throughout I believe] Fixes: 9d230c9e4f4e ("i2c: ChromeOS EC tunnel driver") Cc: # 9798ac6d32c1 mfd: cros_ec: Add cros_ec_cmd_xfer_status() helper Signed-off-by: Brian Norris Reviewed-by: Javier Martinez Canillas Reviewed-by: Guenter Roeck Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index a0d95ff..2d5ff863 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -215,7 +215,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[], msg->outsize = request_len; msg->insize = response_len; - result = cros_ec_cmd_xfer(bus->ec, msg); + result = cros_ec_cmd_xfer_status(bus->ec, msg); if (result < 0) { dev_err(dev, "Error transferring EC i2c message %d\n", result); goto exit; -- cgit v0.10.2 From 97ccd4af120c6ee183f424f17672870809bd6efd Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Thu, 4 Aug 2016 02:38:44 +0300 Subject: i2c: ocores: add missed clk_disable_unprepare() on failure paths clk_disable_unprepare() is missed on failure paths in ocores_i2c_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Peter Korsgaard Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index dfa7a4b..ac88a52 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -379,6 +379,7 @@ static int ocores_i2c_of_probe(struct platform_device *pdev, if (!clock_frequency_present) { dev_err(&pdev->dev, "Missing required parameter 'opencores,ip-clock-frequency'\n"); + clk_disable_unprepare(i2c->clk); return -ENODEV; } i2c->ip_clock_khz = clock_frequency / 1000; @@ -467,20 +468,21 @@ static int ocores_i2c_probe(struct platform_device *pdev) default: dev_err(&pdev->dev, "Unsupported I/O width (%d)\n", i2c->reg_io_width); - return -EINVAL; + ret = -EINVAL; + goto err_clk; } } ret = ocores_init(&pdev->dev, i2c); if (ret) - return ret; + goto err_clk; init_waitqueue_head(&i2c->wait); ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0, pdev->name, i2c); if (ret) { dev_err(&pdev->dev, "Cannot claim IRQ\n"); - return ret; + goto err_clk; } /* hook up driver to tree */ @@ -494,7 +496,7 @@ static int ocores_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(&i2c->adap); if (ret) { dev_err(&pdev->dev, "Failed to add adapter\n"); - return ret; + goto err_clk; } /* add in known devices to the bus */ @@ -504,6 +506,10 @@ static int ocores_i2c_probe(struct platform_device *pdev) } return 0; + +err_clk: + clk_disable_unprepare(i2c->clk); + return ret; } static int ocores_i2c_remove(struct platform_device *pdev) -- cgit v0.10.2 From 434f14e745442a4230a40cf47f84deac1028f64f Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Wed, 3 Aug 2016 16:58:26 +0200 Subject: i2c: at91: fix support of the "alternative command" feature The "alternative command" feature was introduced with sama5d2 SoCs. Its purpose is to let the hardware i2c controller automatically send the STOP condition on the i2c bus at the end of a data transfer. Without this feature, the i2c driver has to write the 'STOP' bit into the Control Register so the hardware i2c controller is triggered to send the STOP condition on the bus. Using the "alternative command" feature requires to set the transfer data length into the 8bit DATAL field of the Alternative Command Register. Hence only data transfers up to 255 bytes can take advantage of the "alternative command" feature. For greater data transfer sizes, the driver should use the previous implementation, when the "alternative command" support was not implemented yet. Signed-off-by: Cyrille Pitchen Signed-off-by: Ludovic Desroches Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index f233726..1bb97f6 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -38,6 +38,7 @@ #define AT91_I2C_TIMEOUT msecs_to_jiffies(100) /* transfer timeout */ #define AT91_I2C_DMA_THRESHOLD 8 /* enable DMA if transfer size is bigger than this threshold */ #define AUTOSUSPEND_TIMEOUT 2000 +#define AT91_I2C_MAX_ALT_CMD_DATA_SIZE 256 /* AT91 TWI register definitions */ #define AT91_TWI_CR 0x0000 /* Control Register */ @@ -141,6 +142,7 @@ struct at91_twi_dev { unsigned twi_cwgr_reg; struct at91_twi_pdata *pdata; bool use_dma; + bool use_alt_cmd; bool recv_len_abort; u32 fifo_size; struct at91_twi_dma dma; @@ -269,7 +271,7 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev) /* send stop when last byte has been written */ if (--dev->buf_len == 0) - if (!dev->pdata->has_alt_cmd) + if (!dev->use_alt_cmd) at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len); @@ -292,7 +294,7 @@ static void at91_twi_write_data_dma_callback(void *data) * we just have to enable TXCOMP one. */ at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP); - if (!dev->pdata->has_alt_cmd) + if (!dev->use_alt_cmd) at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); } @@ -410,7 +412,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev) } /* send stop if second but last byte has been read */ - if (!dev->pdata->has_alt_cmd && dev->buf_len == 1) + if (!dev->use_alt_cmd && dev->buf_len == 1) at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len); @@ -426,7 +428,7 @@ static void at91_twi_read_data_dma_callback(void *data) dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]), dev->buf_len, DMA_FROM_DEVICE); - if (!dev->pdata->has_alt_cmd) { + if (!dev->use_alt_cmd) { /* The last two bytes have to be read without using dma */ dev->buf += dev->buf_len - 2; dev->buf_len = 2; @@ -443,7 +445,7 @@ static void at91_twi_read_data_dma(struct at91_twi_dev *dev) struct dma_chan *chan_rx = dma->chan_rx; size_t buf_len; - buf_len = (dev->pdata->has_alt_cmd) ? dev->buf_len : dev->buf_len - 2; + buf_len = (dev->use_alt_cmd) ? dev->buf_len : dev->buf_len - 2; dma->direction = DMA_FROM_DEVICE; /* Keep in mind that we won't use dma to read the last two bytes */ @@ -651,7 +653,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) unsigned start_flags = AT91_TWI_START; /* if only one byte is to be read, immediately stop transfer */ - if (!has_alt_cmd && dev->buf_len <= 1 && + if (!dev->use_alt_cmd && dev->buf_len <= 1 && !(dev->msg->flags & I2C_M_RECV_LEN)) start_flags |= AT91_TWI_STOP; at91_twi_write(dev, AT91_TWI_CR, start_flags); @@ -745,7 +747,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) int ret; unsigned int_addr_flag = 0; struct i2c_msg *m_start = msg; - bool is_read, use_alt_cmd = false; + bool is_read; dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num); @@ -768,14 +770,16 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) at91_twi_write(dev, AT91_TWI_IADR, internal_address); } + dev->use_alt_cmd = false; is_read = (m_start->flags & I2C_M_RD); if (dev->pdata->has_alt_cmd) { - if (m_start->len > 0) { + if (m_start->len > 0 && + m_start->len < AT91_I2C_MAX_ALT_CMD_DATA_SIZE) { at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMEN); at91_twi_write(dev, AT91_TWI_ACR, AT91_TWI_ACR_DATAL(m_start->len) | ((is_read) ? AT91_TWI_ACR_DIR : 0)); - use_alt_cmd = true; + dev->use_alt_cmd = true; } else { at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMDIS); } @@ -784,7 +788,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) at91_twi_write(dev, AT91_TWI_MMR, (m_start->addr << 16) | int_addr_flag | - ((!use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0)); + ((!dev->use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0)); dev->buf_len = m_start->len; dev->buf = m_start->buf; -- cgit v0.10.2 From 8d263be8716da67bd8746e266c1934cb9c9933cc Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 3 Aug 2016 14:03:08 +0200 Subject: i2c: bcm-iproc: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: bcm_iproc_i2c_xfer_single_msg() reinit_completion() ... (activate the transfer) ... wait_for_completion_timeout() Signed-off-by: Daniel Wagner Acked-by: Ray Jui Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 19c8438..95f7cac 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -158,7 +158,7 @@ static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data) if (status & BIT(IS_M_START_BUSY_SHIFT)) { iproc_i2c->xfer_is_done = 1; - complete_all(&iproc_i2c->done); + complete(&iproc_i2c->done); } writel(status, iproc_i2c->base + IS_OFFSET); -- cgit v0.10.2 From 752c3899f97c469efea42b1ad6de66d18a21ce09 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 3 Aug 2016 14:03:09 +0200 Subject: i2c: bcm-kona: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: bcm_kona_send_i2c_cmd() reinit_completion() ... bcm_kona_i2c_send_cmd_to_ctrl() ... wait_for_completion_timeout() Signed-off-by: Daniel Wagner Acked-by: Ray Jui Reviewed-by: Tim Kryger Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/i2c-bcm-kona.c b/drivers/i2c/busses/i2c-bcm-kona.c index ac9f476..f987432 100644 --- a/drivers/i2c/busses/i2c-bcm-kona.c +++ b/drivers/i2c/busses/i2c-bcm-kona.c @@ -229,7 +229,7 @@ static irqreturn_t bcm_kona_i2c_isr(int irq, void *devid) dev->base + TXFCR_OFFSET); writel(status & ~ISR_RESERVED_MASK, dev->base + ISR_OFFSET); - complete_all(&dev->done); + complete(&dev->done); return IRQ_HANDLED; } -- cgit v0.10.2 From fea03a6ab1d8e456c0319c194150621629504f80 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 3 Aug 2016 14:03:10 +0200 Subject: i2c: brcmstb: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: brcmstb_send_i2c_cmd() reinit_completion() ... /* initiate transfer by setting iic_enable */ ... brcmstb_i2c_wait_for_completion() Signed-off-by: Daniel Wagner Reviewed-by: Kamal Dasu Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 3f5a4d7..385b57b 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -228,7 +228,7 @@ static irqreturn_t brcmstb_i2c_isr(int irq, void *devid) return IRQ_NONE; brcmstb_i2c_enable_disable_irq(dev, INT_DISABLE); - complete_all(&dev->done); + complete(&dev->done); dev_dbg(dev->device, "isr handled"); return IRQ_HANDLED; -- cgit v0.10.2 From 0268263f0cf2239eb3a747936d8d36ef5f5e1688 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 3 Aug 2016 14:03:11 +0200 Subject: i2c: meson: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: meson_i2c_xfer_msg() reinit_completion() ... /* Start the transfer */ ... wait_for_completion_timeout() Signed-off-by: Daniel Wagner Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index 71d3929..76e2898 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -211,7 +211,7 @@ static void meson_i2c_stop(struct meson_i2c *i2c) meson_i2c_add_token(i2c, TOKEN_STOP); } else { i2c->state = STATE_IDLE; - complete_all(&i2c->done); + complete(&i2c->done); } } @@ -238,7 +238,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id) dev_dbg(i2c->dev, "error bit set\n"); i2c->error = -ENXIO; i2c->state = STATE_IDLE; - complete_all(&i2c->done); + complete(&i2c->done); goto out; } @@ -269,7 +269,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id) break; case STATE_STOP: i2c->state = STATE_IDLE; - complete_all(&i2c->done); + complete(&i2c->done); break; case STATE_IDLE: break; -- cgit v0.10.2 From 52012619e5a2ca0491426c3712fb9054692d4a3c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 14 Aug 2016 23:44:21 +0300 Subject: ringtest: test build fix Recent changes to ptr_ring broke the ringtest which lacks a likely() stub. Fix it up. Fixes: 982fb490c298896d15e9323a882f34a57c11ff56 ("ptr_ring: support zero length ring") Signed-off-by: Michael S. Tsirkin diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index 68e4f9f..bd2ad1d 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -13,6 +13,7 @@ #define cache_line_size() SMP_CACHE_BYTES #define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES))) #define unlikely(x) (__builtin_expect(!!(x), 0)) +#define likely(x) (__builtin_expect(!!(x), 1)) #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a)) typedef pthread_spinlock_t spinlock_t; -- cgit v0.10.2 From 21bc54fc0cdc31de72b57d2b3c79cf9c2b83cf39 Mon Sep 17 00:00:00 2001 From: Gerard Garcia Date: Wed, 10 Aug 2016 17:24:34 +0200 Subject: vhost/vsock: drop space available check for TX vq Remove unnecessary use of enable/disable callback notifications and the incorrect more space available check. The virtio_transport_tx_work handles when the TX virtqueue has more buffers available. Signed-off-by: Gerard Garcia Acked-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 699dfab..936d7ee 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; - /* Avoid unnecessary interrupts while we're processing the ring */ - virtqueue_disable_cb(vq); - for (;;) { struct virtio_vsock_pkt *pkt; struct scatterlist hdr, buf, *sgs[2]; @@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) spin_lock_bh(&vsock->send_pkt_list_lock); if (list_empty(&vsock->send_pkt_list)) { spin_unlock_bh(&vsock->send_pkt_list_lock); - virtqueue_enable_cb(vq); break; } @@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work) } ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); + /* Usually this means that there is no more space available in + * the vq + */ if (ret < 0) { spin_lock_bh(&vsock->send_pkt_list_lock); list_add(&pkt->list, &vsock->send_pkt_list); spin_unlock_bh(&vsock->send_pkt_list_lock); - - if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) - continue; /* retry now that we have more space */ break; } -- cgit v0.10.2 From 446374d7c7f89603d8151b56824e2cac85ed8e0d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 15 Aug 2016 04:28:12 +0300 Subject: vhost/test: fix after swiotlb changes Signed-off-by: Michael S. Tsirkin diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 388eec4..97fb2f8 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -220,20 +220,20 @@ static long vhost_test_reset_owner(struct vhost_test *n) { void *priv = NULL; long err; - struct vhost_memory *memory; + struct vhost_umem *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; - memory = vhost_dev_reset_owner_prepare(); - if (!memory) { + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { err = -ENOMEM; goto done; } vhost_test_stop(n, &priv); vhost_test_flush(n); - vhost_dev_reset_owner(&n->dev, memory); + vhost_dev_reset_owner(&n->dev, umem); done: mutex_unlock(&n->dev.mutex); return err; -- cgit v0.10.2 From 6be3ffaa0e15c64f560904b025f5c50bef5886f9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 15 Aug 2016 04:50:55 +0300 Subject: tools/virtio: add dma stubs Fixes build after recent IOMMU-related changes, mustly by adding more stubs. Signed-off-by: Michael S. Tsirkin diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 4f93af8..18601f6 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -14,4 +14,20 @@ enum dma_data_direction { DMA_NONE = 3, }; +#define dma_alloc_coherent(d, s, hp, f) ({ \ + void *__dma_alloc_coherent_p = kmalloc((s), (f)); \ + *(hp) = (unsigned long)__dma_alloc_coherent_p; \ + __dma_alloc_coherent_p; \ +}) + +#define dma_free_coherent(d, s, p, h) kfree(p) + +#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o)) + +#define dma_map_single(d, p, s, dir) (virt_to_phys(p)) +#define dma_mapping_error(...) (0) + +#define dma_unmap_single(...) do { } while (0) +#define dma_unmap_page(...) do { } while (0) + #endif diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 0338499..d9554fc 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -20,7 +20,9 @@ #define PAGE_SIZE getpagesize() #define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK) +typedef unsigned long long phys_addr_t; typedef unsigned long long dma_addr_t; typedef size_t __kernel_size_t; typedef unsigned int __wsum; @@ -57,6 +59,11 @@ static inline void *kzalloc(size_t s, gfp_t gfp) return p; } +static inline void *alloc_pages_exact(size_t s, gfp_t gfp) +{ + return kmalloc(s, gfp); +} + static inline void kfree(void *p) { if (p >= __kfree_ignore_start && p < __kfree_ignore_end) @@ -64,6 +71,11 @@ static inline void kfree(void *p) free(p); } +static inline void free_pages_exact(void *p, size_t s) +{ + kfree(p); +} + static inline void *krealloc(void *p, size_t s, gfp_t gfp) { return realloc(p, s); @@ -105,6 +117,8 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) + #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h index 81baeac..7e1c119 100644 --- a/tools/virtio/linux/slab.h +++ b/tools/virtio/linux/slab.h @@ -1,2 +1,6 @@ #ifndef LINUX_SLAB_H +#define GFP_KERNEL 0 +#define GFP_ATOMIC 0 +#define __GFP_NOWARN 0 +#define __GFP_ZERO 0 #endif diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index ee125e7..9377c8b 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -3,8 +3,12 @@ #include #include +struct device { + void *parent; +}; + struct virtio_device { - void *dev; + struct device dev; u64 features; }; diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index 57a6964..9ba1181 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -40,6 +40,19 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev, #define virtio_has_feature(dev, feature) \ (__virtio_test_bit((dev), feature)) +/** + * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * @vdev: the device + */ +static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +{ + /* + * Note the reverse polarity of the quirk feature (compared to most + * other features), this is for compatibility with legacy systems. + */ + return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +} + static inline bool virtio_is_little_endian(struct virtio_device *vdev) { return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || -- cgit v0.10.2 From 1c8d477a77e2d1d3504419e7f2e02e6422becf9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Aug 2016 12:47:49 -0400 Subject: pNFS/flexfiles: Fix layoutstat periodic reporting Putting the periodicity timer in the mirror instances is causing non-scalable reporting behaviour and missed reporting intervals. When you recall layouts and/or implement client side mirroring, it leads to consecutive reports with only a few ms between RPC calls. Signed-off-by: Trond Myklebust Fixes: d0379a5d066a9 ("pNFS/flexfiles: Support server-supplied...") diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index e6206ea..ee1c94c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -37,6 +37,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) if (ffl) { INIT_LIST_HEAD(&ffl->error_list); INIT_LIST_HEAD(&ffl->mirrors); + ffl->last_report_time = ktime_get(); return &ffl->generic_hdr; } else return NULL; @@ -640,19 +641,18 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, { static const ktime_t notime = {0}; s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL; + struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout); nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); if (ktime_equal(mirror->start_time, notime)) mirror->start_time = now; - if (ktime_equal(mirror->last_report_time, notime)) - mirror->last_report_time = now; if (mirror->report_interval != 0) report_interval = (s64)mirror->report_interval * 1000LL; else if (layoutstats_timer != 0) report_interval = (s64)layoutstats_timer * 1000LL; - if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >= report_interval) { - mirror->last_report_time = now; + ffl->last_report_time = now; return true; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 1bcdb15..3ee0c9f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -84,7 +84,6 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; - ktime_t last_report_time; u32 report_interval; }; @@ -101,6 +100,7 @@ struct nfs4_flexfile_layout { struct pnfs_ds_commit_info commit_info; struct list_head mirrors; struct list_head error_list; /* nfs4_ff_layout_ds_err */ + ktime_t last_report_time; /* Layoutstat report times */ }; static inline struct nfs4_flexfile_layout * -- cgit v0.10.2 From 4cf0b354d92ee2c642532ee39e330f8f580fd985 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Aug 2016 12:03:52 +0200 Subject: rhashtable: avoid large lock-array allocations Sander reports following splat after netfilter nat bysrc table got converted to rhashtable: swapper/0: page allocation failure: order:3, mode:0x2084020(GFP_ATOMIC|__GFP_COMP) CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc1 [..] [] warn_alloc_failed+0xdd/0x140 [] __alloc_pages_nodemask+0x3e1/0xcf0 [] alloc_pages_current+0x8d/0x110 [] kmalloc_order+0x1f/0x70 [] __kmalloc+0x129/0x140 [] bucket_table_alloc+0xc1/0x1d0 [] rhashtable_insert_rehash+0x5d/0xe0 [] nf_nat_setup_info+0x2ef/0x400 The failure happens when allocating the spinlock array. Even with GFP_KERNEL its unlikely for such a large allocation to succeed. Thomas Graf pointed me at inet_ehash_locks_alloc(), so in addition to adding NOWARN for atomic allocations this also makes the bucket-array sizing more conservative. In commit 095dc8e0c3686 ("tcp: fix/cleanup inet_ehash_locks_alloc()"), Eric Dumazet says: "Budget 2 cache lines per cpu worth of 'spinlocks'". IOW, consider size needed by a single spinlock when determining number of locks per cpu. So with 64 byte per cacheline and 4 byte per spinlock this gives 32 locks per cpu. Resulting size of the lock-array (sizeof(spinlock) == 4): cpus: 1 2 4 8 16 32 64 old: 1k 1k 4k 8k 16k 16k 16k new: 128 256 512 1k 2k 4k 8k 8k allocation should have decent chance of success even with GFP_ATOMIC, and should not fail with GFP_KERNEL. With 72-byte spinlock (LOCKDEP): cpus : 1 2 old: 9k 18k new: ~2k ~4k Reported-by: Sander Eikelenboom Suggested-by: Thomas Graf Signed-off-by: Florian Westphal Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 5d845ff..42acd81 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -30,7 +30,7 @@ #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U -#define BUCKET_LOCKS_PER_CPU 128UL +#define BUCKET_LOCKS_PER_CPU 32UL static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, @@ -70,7 +70,7 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, unsigned int nr_pcpus = num_possible_cpus(); #endif - nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); /* Never allocate more than 0.5 locks per bucket */ @@ -83,6 +83,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, tbl->locks = vmalloc(size * sizeof(spinlock_t)); else #endif + if (gfp != GFP_KERNEL) + gfp |= __GFP_NOWARN | __GFP_NORETRY; + tbl->locks = kmalloc_array(size, sizeof(spinlock_t), gfp); if (!tbl->locks) -- cgit v0.10.2 From eb8fc32354aa77678dc6e7950a8f0c79cace204f Mon Sep 17 00:00:00 2001 From: Vincent Date: Sun, 14 Aug 2016 15:38:29 +0200 Subject: mlxsw: spectrum_router: Fix use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mlxsw_sp_router_fib4_add_info_destroy(), the fib_entry pointer is used after it has been freed by mlxsw_sp_fib_entry_destroy(). Use a temporary variable to fix this. Fixes: 61c503f976b5449e ("mlxsw: spectrum_router: Implement fib4 add/del switchdev obj ops") Signed-off-by: Vincent Stehlé Cc: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 81418d6..90bb93b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1651,9 +1651,10 @@ static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) const struct mlxsw_sp_router_fib4_add_info *info = data; struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; + struct mlxsw_sp_vr *vr = fib_entry->vr; mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); kfree(info); } -- cgit v0.10.2 From 5a5a1d614287a647b36dff3f40c2b0ceabbc83ec Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 12 Aug 2016 01:05:08 +0300 Subject: USB: serial: mos7720: fix non-atomic allocation in write path There is an allocation with GFP_KERNEL flag in mos7720_write(), while it may be called from interrupt context. Follow-up for commit 191252837626 ("USB: kobil_sct: fix non-atomic allocation in write path") Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 5608af4..de9992b 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1252,7 +1252,7 @@ static int mos7720_write(struct tty_struct *tty, struct usb_serial_port *port, if (urb->transfer_buffer == NULL) { urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE, - GFP_KERNEL); + GFP_ATOMIC); if (!urb->transfer_buffer) goto exit; } -- cgit v0.10.2 From 3b7c7e52efda0d4640060de747768360ba70a7c0 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 12 Aug 2016 01:05:09 +0300 Subject: USB: serial: mos7840: fix non-atomic allocation in write path There is an allocation with GFP_KERNEL flag in mos7840_write(), while it may be called from interrupt context. Follow-up for commit 191252837626 ("USB: kobil_sct: fix non-atomic allocation in write path") Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index ed378fb..57426d7 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1340,8 +1340,8 @@ static int mos7840_write(struct tty_struct *tty, struct usb_serial_port *port, } if (urb->transfer_buffer == NULL) { - urb->transfer_buffer = - kmalloc(URB_TRANSFER_BUFFER_SIZE, GFP_KERNEL); + urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE, + GFP_ATOMIC); if (!urb->transfer_buffer) goto exit; } -- cgit v0.10.2 From d9853490176c88fff71d03ec9a174b77011f5026 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 28 Jul 2016 11:50:48 +0200 Subject: drm/etnaviv: take GPU lock later in the submit process Both the fence and event alloc are safe to be done without holding the GPU lock, as they either don't need any locking (fences) or are protected by their own lock (events). This solves a bad locking interaction between the submit path and the recover worker. If userspace manages to exhaust all available events while the GPU is hung, the submit will wait for events to become available holding the GPU lock. The recover worker waits for this lock to become available before trying to recover the GPU which frees up the allocated events. Essentially both paths are deadlocked until the submit path times out waiting for available events, failing the submit that could otherwise be handled just fine if the recover worker had the chance to bring the GPU back in a working state. Signed-off-by: Lucas Stach Reviewed-by: Christian Gmeiner diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 87ef341..b382cf5 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1333,8 +1333,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, if (ret < 0) return ret; - mutex_lock(&gpu->lock); - /* * TODO * @@ -1348,16 +1346,18 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, if (unlikely(event == ~0U)) { DRM_ERROR("no free event\n"); ret = -EBUSY; - goto out_unlock; + goto out_pm_put; } fence = etnaviv_gpu_fence_alloc(gpu); if (!fence) { event_free(gpu, event); ret = -ENOMEM; - goto out_unlock; + goto out_pm_put; } + mutex_lock(&gpu->lock); + gpu->event[event].fence = fence; submit->fence = fence->seqno; gpu->active_fence = submit->fence; @@ -1395,9 +1395,9 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, hangcheck_timer_reset(gpu); ret = 0; -out_unlock: mutex_unlock(&gpu->lock); +out_pm_put: etnaviv_gpu_pm_put(gpu); return ret; -- cgit v0.10.2 From 802934b2cfde463b72cc1b9bc1c081895a90be53 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 5 Aug 2016 12:29:06 -0400 Subject: dm round robin: do not use this_cpu_ptr() without having preemption disabled Use local_irq_save() to disable preemption before calling this_cpu_ptr(). Reported-by: Benjamin Block Fixes: b0b477c7e0dd ("dm round robin: use percpu 'repeat_count' and 'current_path'") Cc: stable@vger.kernel.org # 4.6+ Suggested-by: Jens Axboe Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 4ace1da..6c25213 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c @@ -210,14 +210,17 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) struct path_info *pi = NULL; struct dm_path *current_path = NULL; + local_irq_save(flags); current_path = *this_cpu_ptr(s->current_path); if (current_path) { percpu_counter_dec(&s->repeat_count); - if (percpu_counter_read_positive(&s->repeat_count) > 0) + if (percpu_counter_read_positive(&s->repeat_count) > 0) { + local_irq_restore(flags); return current_path; + } } - spin_lock_irqsave(&s->lock, flags); + spin_lock(&s->lock); if (!list_empty(&s->valid_paths)) { pi = list_entry(s->valid_paths.next, struct path_info, list); list_move_tail(&pi->list, &s->valid_paths); -- cgit v0.10.2 From 0a83df6c8cacafbefc5b56b2fbcb92b0d75b744b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Jul 2016 17:30:20 -0400 Subject: dm crypt: increase mempool reserve to better support swapping Increase mempool size from 16 to 64 entries. This increase improves swap on dm-crypt performance. When swapping to dm-crypt, all available memory is temporarily exhausted and dm-crypt can only use the mempool reserve. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4e9784b..eedba67 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -181,7 +181,7 @@ struct crypt_config { u8 key[0]; }; -#define MIN_IOS 16 +#define MIN_IOS 64 static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); -- cgit v0.10.2 From 7ef9153d9af5fe7ce32dcc0f558bfcfc3d2b3016 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 15 Aug 2016 15:17:41 +0200 Subject: misc: delete bh1780 driver The Rohm BH1780 ambient light sensor has a new driver with extended functionality (proper runtime PM) in the appropriate framework IIO, it can be found at: drivers/iio/light/bh1780.c The MISC driver symbol CONFIG_SENSORS_BH1780 does not appear in any defconfigs, so it should safe to delete. Cc: Hemanth V Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index a216b46..d002528 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -345,16 +345,6 @@ config SENSORS_TSL2550 This driver can also be built as a module. If so, the module will be called tsl2550. -config SENSORS_BH1780 - tristate "ROHM BH1780GLI ambient light sensor" - depends on I2C && SYSFS - help - If you say yes here you get support for the ROHM BH1780GLI - ambient light sensor. - - This driver can also be built as a module. If so, the module - will be called bh1780gli. - config SENSORS_BH1770 tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" depends on I2C diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 7410c6d..fb32516 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_TIFM_CORE) += tifm_core.o obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o obj-$(CONFIG_PHANTOM) += phantom.o obj-$(CONFIG_QCOM_COINCELL) += qcom-coincell.o -obj-$(CONFIG_SENSORS_BH1780) += bh1780gli.o obj-$(CONFIG_SENSORS_BH1770) += bh1770glc.o obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o obj-$(CONFIG_SGI_IOC4) += ioc4.o diff --git a/drivers/misc/bh1780gli.c b/drivers/misc/bh1780gli.c deleted file mode 100644 index 7f90ce5..0000000 --- a/drivers/misc/bh1780gli.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * bh1780gli.c - * ROHM Ambient Light Sensor Driver - * - * Copyright (C) 2010 Texas Instruments - * Author: Hemanth V - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ -#include -#include -#include -#include -#include -#include -#include - -#define BH1780_REG_CONTROL 0x80 -#define BH1780_REG_PARTID 0x8A -#define BH1780_REG_MANFID 0x8B -#define BH1780_REG_DLOW 0x8C -#define BH1780_REG_DHIGH 0x8D - -#define BH1780_REVMASK (0xf) -#define BH1780_POWMASK (0x3) -#define BH1780_POFF (0x0) -#define BH1780_PON (0x3) - -/* power on settling time in ms */ -#define BH1780_PON_DELAY 2 - -struct bh1780_data { - struct i2c_client *client; - int power_state; - /* lock for sysfs operations */ - struct mutex lock; -}; - -static int bh1780_write(struct bh1780_data *ddata, u8 reg, u8 val, char *msg) -{ - int ret = i2c_smbus_write_byte_data(ddata->client, reg, val); - if (ret < 0) - dev_err(&ddata->client->dev, - "i2c_smbus_write_byte_data failed error %d Register (%s)\n", - ret, msg); - return ret; -} - -static int bh1780_read(struct bh1780_data *ddata, u8 reg, char *msg) -{ - int ret = i2c_smbus_read_byte_data(ddata->client, reg); - if (ret < 0) - dev_err(&ddata->client->dev, - "i2c_smbus_read_byte_data failed error %d Register (%s)\n", - ret, msg); - return ret; -} - -static ssize_t bh1780_show_lux(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - struct bh1780_data *ddata = platform_get_drvdata(pdev); - int lsb, msb; - - lsb = bh1780_read(ddata, BH1780_REG_DLOW, "DLOW"); - if (lsb < 0) - return lsb; - - msb = bh1780_read(ddata, BH1780_REG_DHIGH, "DHIGH"); - if (msb < 0) - return msb; - - return sprintf(buf, "%d\n", (msb << 8) | lsb); -} - -static ssize_t bh1780_show_power_state(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - struct bh1780_data *ddata = platform_get_drvdata(pdev); - int state; - - state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL"); - if (state < 0) - return state; - - return sprintf(buf, "%d\n", state & BH1780_POWMASK); -} - -static ssize_t bh1780_store_power_state(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct platform_device *pdev = to_platform_device(dev); - struct bh1780_data *ddata = platform_get_drvdata(pdev); - unsigned long val; - int error; - - error = kstrtoul(buf, 0, &val); - if (error) - return error; - - if (val < BH1780_POFF || val > BH1780_PON) - return -EINVAL; - - mutex_lock(&ddata->lock); - - error = bh1780_write(ddata, BH1780_REG_CONTROL, val, "CONTROL"); - if (error < 0) { - mutex_unlock(&ddata->lock); - return error; - } - - msleep(BH1780_PON_DELAY); - ddata->power_state = val; - mutex_unlock(&ddata->lock); - - return count; -} - -static DEVICE_ATTR(lux, S_IRUGO, bh1780_show_lux, NULL); - -static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, - bh1780_show_power_state, bh1780_store_power_state); - -static struct attribute *bh1780_attributes[] = { - &dev_attr_power_state.attr, - &dev_attr_lux.attr, - NULL -}; - -static const struct attribute_group bh1780_attr_group = { - .attrs = bh1780_attributes, -}; - -static int bh1780_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - int ret; - struct bh1780_data *ddata; - struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); - - if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) - return -EIO; - - ddata = devm_kzalloc(&client->dev, sizeof(struct bh1780_data), - GFP_KERNEL); - if (ddata == NULL) - return -ENOMEM; - - ddata->client = client; - i2c_set_clientdata(client, ddata); - - ret = bh1780_read(ddata, BH1780_REG_PARTID, "PART ID"); - if (ret < 0) - return ret; - - dev_info(&client->dev, "Ambient Light Sensor, Rev : %d\n", - (ret & BH1780_REVMASK)); - - mutex_init(&ddata->lock); - - return sysfs_create_group(&client->dev.kobj, &bh1780_attr_group); -} - -static int bh1780_remove(struct i2c_client *client) -{ - sysfs_remove_group(&client->dev.kobj, &bh1780_attr_group); - - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int bh1780_suspend(struct device *dev) -{ - struct bh1780_data *ddata; - int state, ret; - struct i2c_client *client = to_i2c_client(dev); - - ddata = i2c_get_clientdata(client); - state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL"); - if (state < 0) - return state; - - ddata->power_state = state & BH1780_POWMASK; - - ret = bh1780_write(ddata, BH1780_REG_CONTROL, BH1780_POFF, - "CONTROL"); - - if (ret < 0) - return ret; - - return 0; -} - -static int bh1780_resume(struct device *dev) -{ - struct bh1780_data *ddata; - int state, ret; - struct i2c_client *client = to_i2c_client(dev); - - ddata = i2c_get_clientdata(client); - state = ddata->power_state; - ret = bh1780_write(ddata, BH1780_REG_CONTROL, state, - "CONTROL"); - - if (ret < 0) - return ret; - - return 0; -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(bh1780_pm, bh1780_suspend, bh1780_resume); - -static const struct i2c_device_id bh1780_id[] = { - { "bh1780", 0 }, - { }, -}; - -MODULE_DEVICE_TABLE(i2c, bh1780_id); - -#ifdef CONFIG_OF -static const struct of_device_id of_bh1780_match[] = { - { .compatible = "rohm,bh1780gli", }, - {}, -}; - -MODULE_DEVICE_TABLE(of, of_bh1780_match); -#endif - -static struct i2c_driver bh1780_driver = { - .probe = bh1780_probe, - .remove = bh1780_remove, - .id_table = bh1780_id, - .driver = { - .name = "bh1780", - .pm = &bh1780_pm, - .of_match_table = of_match_ptr(of_bh1780_match), - }, -}; - -module_i2c_driver(bh1780_driver); - -MODULE_DESCRIPTION("BH1780GLI Ambient Light Sensor Driver"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Hemanth V "); -- cgit v0.10.2 From add125054b8727103631dce116361668436ef6a7 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Fri, 12 Aug 2016 00:52:56 -0700 Subject: cdc-acm: fix wrong pipe type on rx interrupt xfers This fixes the "BOGUS urb xfer" warning logged by usb_submit_urb(). Signed-off-by: Gavin Li Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7191230..0f3f62e 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1354,7 +1354,6 @@ made_compressed_probe: spin_lock_init(&acm->write_lock); spin_lock_init(&acm->read_lock); mutex_init(&acm->mutex); - acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress); acm->is_int_ep = usb_endpoint_xfer_int(epread); if (acm->is_int_ep) acm->bInterval = epread->bInterval; @@ -1394,14 +1393,14 @@ made_compressed_probe: urb->transfer_dma = rb->dma; if (acm->is_int_ep) { usb_fill_int_urb(urb, acm->dev, - acm->rx_endpoint, + usb_rcvintpipe(usb_dev, epread->bEndpointAddress), rb->base, acm->readsize, acm_read_bulk_callback, rb, acm->bInterval); } else { usb_fill_bulk_urb(urb, acm->dev, - acm->rx_endpoint, + usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress), rb->base, acm->readsize, acm_read_bulk_callback, rb); diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 05ce308..1f1eabf 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -96,7 +96,6 @@ struct acm { struct acm_rb read_buffers[ACM_NR]; struct acm_wb *putbuffer; /* for acm_tty_put_char() */ int rx_buflimit; - int rx_endpoint; spinlock_t read_lock; int write_used; /* number of non-empty write buffers */ int transmitting; -- cgit v0.10.2 From bb9947c3a14e781eb0f137728e7e55ec8d848991 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 14 Jul 2016 11:06:11 +0000 Subject: iio: pressure: bmp280: fix wrong pointer passed to PTR_ERR() PTR_ERR should access the value just tested by IS_ERR, otherwise the wrong error code will be returned. Signed-off-by: Wei Yongjun Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 6943688..94e27b2 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -970,7 +970,7 @@ int bmp280_common_probe(struct device *dev, data->vdda = devm_regulator_get(dev, "vdda"); if (IS_ERR(data->vdda)) { dev_err(dev, "failed to get VDDA regulator\n"); - ret = PTR_ERR(data->vddd); + ret = PTR_ERR(data->vdda); goto out_disable_vddd; } ret = regulator_enable(data->vdda); -- cgit v0.10.2 From 776b645315d372e06e46167194605d308aa1a790 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Thu, 21 Jul 2016 11:23:13 -0400 Subject: staging: iio: ad5933: Return correct value for AD5933_OUT_RANGE. The 'break' statement after calling ad5933_cmd only breaks out of the 'for' loop, which then unconditionally sets the return value to -EINVAL. Move the initialisation of 'ret' so we return the correct value. Signed-off-by: Phil Turnbull Signed-off-by: Jonathan Cameron diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 170ac98..24c348d 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -419,6 +419,7 @@ static ssize_t ad5933_store(struct device *dev, mutex_lock(&indio_dev->mlock); switch ((u32)this_attr->address) { case AD5933_OUT_RANGE: + ret = -EINVAL; for (i = 0; i < 4; i++) if (val == st->range_avail[i]) { st->ctrl_hb &= ~AD5933_CTRL_RANGE(0x3); @@ -426,7 +427,6 @@ static ssize_t ad5933_store(struct device *dev, ret = ad5933_cmd(st, 0); break; } - ret = -EINVAL; break; case AD5933_IN_PGA_GAIN: if (sysfs_streq(buf, "1")) { -- cgit v0.10.2 From b2f0c09664b72b2f8c581383a9337ac3092e42c8 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 11 Jul 2016 13:50:01 +0200 Subject: iio: sw-trigger: Fix config group initialization Use the IS_ENABLED() helper macro to ensure that the configfs group is initialized either when configfs is built-in or when configfs is built as a module. Otherwise software trigger creation will result in undefined behaviour when configfs is built as a mdoule since the configfs group for the trigger is not properly initialized. Fixes: b662f809d410 ("iio: core: Introduce IIO software triggers") Signed-off-by: Lars-Peter Clausen Acked-by: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index 5198f8e..c97eab6 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -62,7 +62,7 @@ void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, struct config_item_type *type) { -#ifdef CONFIG_CONFIGFS_FS +#if IS_ENABLED(CONFIG_CONFIGFS_FS) config_group_init_type_name(&t->group, name, type); #endif } -- cgit v0.10.2 From 7d3cc21dab5313a02f2f3ca8164529b828a030d1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 11 Jul 2016 13:54:17 +0200 Subject: iio: ad799x: Fix buffered capture for ad7991/ad7995/ad7999 The data buffer for captured mode for the ad799x driver is allocated in the update_scan_mode() callback. This callback is not set in the iio_info struct for the ad7791/ad7995/ad7999, which means that the data buffer is not allocated when a captured transfer is started. As a result the driver crashes when the first sample is received. To fix this properly set the update_scan_mode() callback. Fixes: d8dca33027c1 ("staging:iio:ad799x: Preallocate sample buffer") Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c index b616376..9704090 100644 --- a/drivers/iio/adc/ad799x.c +++ b/drivers/iio/adc/ad799x.c @@ -527,6 +527,7 @@ static struct attribute_group ad799x_event_attrs_group = { static const struct iio_info ad7991_info = { .read_raw = &ad799x_read_raw, .driver_module = THIS_MODULE, + .update_scan_mode = ad799x_update_scan_mode, }; static const struct iio_info ad7993_4_7_8_noirq_info = { -- cgit v0.10.2 From b234f683dde97aebb7009c6aecab651d32ad70b4 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 11 Jul 2016 08:25:07 -0700 Subject: iio: accel: bma220_spi: set up buffer timestamps for non-zero values Use the iio_pollfunc_store_time parameter during triggered buffer set-up to get valid timestamps. Signed-off-by: Alison Schofield Cc: Daniel Baluta Reviewed-By: Tiberiu Breana Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/accel/bma220_spi.c b/drivers/iio/accel/bma220_spi.c index 1098d10..5099f29 100644 --- a/drivers/iio/accel/bma220_spi.c +++ b/drivers/iio/accel/bma220_spi.c @@ -253,7 +253,7 @@ static int bma220_probe(struct spi_device *spi) if (ret < 0) return ret; - ret = iio_triggered_buffer_setup(indio_dev, NULL, + ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time, bma220_trigger_handler, NULL); if (ret < 0) { dev_err(&spi->dev, "iio triggered buffer setup failed\n"); -- cgit v0.10.2 From 3c68858df7c2f0c4c343bb4702733fe827491f9e Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 11 Jul 2016 08:26:13 -0700 Subject: iio: humidity: am2315: set up buffer timestamps for non-zero values Use the iio_pollfunc_store_time parameter during triggered buffer set-up to get valid timestamps. Signed-off-by: Alison Schofield Cc: Daniel Baluta Reviewed-By: Tiberiu Breana Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c index 3e200f6..ff96b6d 100644 --- a/drivers/iio/humidity/am2315.c +++ b/drivers/iio/humidity/am2315.c @@ -244,7 +244,7 @@ static int am2315_probe(struct i2c_client *client, indio_dev->channels = am2315_channels; indio_dev->num_channels = ARRAY_SIZE(am2315_channels); - ret = iio_triggered_buffer_setup(indio_dev, NULL, + ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time, am2315_trigger_handler, NULL); if (ret < 0) { dev_err(&client->dev, "iio triggered buffer setup failed\n"); -- cgit v0.10.2 From f8adf645db03345af2d9a8b6095b02327ea50885 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 11 Jul 2016 08:26:56 -0700 Subject: iio: proximity: as3935: set up buffer timestamps for non-zero values Use the iio_pollfunc_store_time parameter during triggered buffer set-up to get valid timestamps. Signed-off-by: Alison Schofield Cc: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index 2e3a70e..5656deb 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -397,7 +397,7 @@ static int as3935_probe(struct spi_device *spi) return ret; } - ret = iio_triggered_buffer_setup(indio_dev, NULL, + ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time, &as3935_trigger_handler, NULL); if (ret) { -- cgit v0.10.2 From 45e98152850c36560484f3fa3bb857a4bfe1a419 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 19 Jul 2016 12:25:00 -0400 Subject: iio: stx104: Unregister IIO device on remove callback The devm_iio_device_register function should not be used if custom operations must be performed in the remove callback. This patch replaces the dem_iio_device_register call with a iio_device_register call and respective iio_device_unregister call in the remove callback. Fixes: 765550e4d98d ("iio: stx104: Add GPIO support for the Apex Embedded Systems STX104") Signed-off-by: William Breathitt Gray Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/dac/stx104.c b/drivers/iio/dac/stx104.c index 792a971..bebbd00 100644 --- a/drivers/iio/dac/stx104.c +++ b/drivers/iio/dac/stx104.c @@ -65,6 +65,16 @@ struct stx104_gpio { unsigned int out_state; }; +/** + * struct stx104_dev - STX104 device private data structure + * @indio_dev: IIO device + * @chip: instance of the gpio_chip + */ +struct stx104_dev { + struct iio_dev *indio_dev; + struct gpio_chip *chip; +}; + static int stx104_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) { @@ -107,6 +117,7 @@ static const struct iio_chan_spec stx104_channels[STX104_NUM_CHAN] = { static int stx104_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) { + /* GPIO 0-3 are input only, while the rest are output only */ if (offset < 4) return 1; @@ -169,6 +180,7 @@ static int stx104_probe(struct device *dev, unsigned int id) struct iio_dev *indio_dev; struct stx104_iio *priv; struct stx104_gpio *stx104gpio; + struct stx104_dev *stx104dev; int err; indio_dev = devm_iio_device_alloc(dev, sizeof(*priv)); @@ -179,6 +191,10 @@ static int stx104_probe(struct device *dev, unsigned int id) if (!stx104gpio) return -ENOMEM; + stx104dev = devm_kzalloc(dev, sizeof(*stx104dev), GFP_KERNEL); + if (!stx104dev) + return -ENOMEM; + if (!devm_request_region(dev, base[id], STX104_EXTENT, dev_name(dev))) { dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n", @@ -199,12 +215,6 @@ static int stx104_probe(struct device *dev, unsigned int id) outw(0, base[id] + 4); outw(0, base[id] + 6); - err = devm_iio_device_register(dev, indio_dev); - if (err) { - dev_err(dev, "IIO device registering failed (%d)\n", err); - return err; - } - stx104gpio->chip.label = dev_name(dev); stx104gpio->chip.parent = dev; stx104gpio->chip.owner = THIS_MODULE; @@ -220,7 +230,9 @@ static int stx104_probe(struct device *dev, unsigned int id) spin_lock_init(&stx104gpio->lock); - dev_set_drvdata(dev, stx104gpio); + stx104dev->indio_dev = indio_dev; + stx104dev->chip = &stx104gpio->chip; + dev_set_drvdata(dev, stx104dev); err = gpiochip_add_data(&stx104gpio->chip, stx104gpio); if (err) { @@ -228,14 +240,22 @@ static int stx104_probe(struct device *dev, unsigned int id) return err; } + err = iio_device_register(indio_dev); + if (err) { + dev_err(dev, "IIO device registering failed (%d)\n", err); + gpiochip_remove(&stx104gpio->chip); + return err; + } + return 0; } static int stx104_remove(struct device *dev, unsigned int id) { - struct stx104_gpio *const stx104gpio = dev_get_drvdata(dev); + struct stx104_dev *const stx104dev = dev_get_drvdata(dev); - gpiochip_remove(&stx104gpio->chip); + iio_device_unregister(stx104dev->indio_dev); + gpiochip_remove(stx104dev->chip); return 0; } -- cgit v0.10.2 From 193e2d4fd91c5e5d563395f9577621dac4f4df31 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 18 Jul 2016 17:56:43 -0700 Subject: iio: add Kconfig selects needed for triggered buffer compiles Select IIO_BUFFER and IIO_TRIGGERED_BUFFER to compile. Remove IIO_TRIGGER if present since IIO_BUFFER selects it. Signed-off-by: Alison Schofield Cc: Daniel Baluta Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index 89d7820..78f148e 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -20,6 +20,8 @@ config BMA180 config BMA220 tristate "Bosch BMA220 3-Axis Accelerometer Driver" depends on SPI + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to add support for the Bosch BMA220 triaxial acceleration sensor. @@ -234,7 +236,8 @@ config STK8312 config STK8BA50 tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver" depends on I2C - depends on IIO_TRIGGER + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to get support for the Sensortek STK8BA50 3-axis accelerometer. diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig index 738a86d..d041243 100644 --- a/drivers/iio/humidity/Kconfig +++ b/drivers/iio/humidity/Kconfig @@ -6,6 +6,8 @@ menu "Humidity sensors" config AM2315 tristate "Aosong AM2315 relative humidity and temperature sensor" depends on I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help If you say yes here you get support for the Aosong AM2315 relative humidity and ambient temperature sensor. diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7c566f5..12ceb11 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -238,6 +238,8 @@ config MAX44000 tristate "MAX44000 Ambient and Infrared Proximity Sensor" depends on I2C select REGMAP_I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here if you want to build support for Maxim Integrated's MAX44000 ambient and infrared proximity sensor device. -- cgit v0.10.2 From 31f453eac56bdc41f434126bc2d5933b9fb720ec Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 27 Jul 2016 22:32:58 +0200 Subject: iio: pressure: bmp280: fix runtime suspend/resume crash In commit 3d838118c6aa ("iio: pressure: bmp280: add power management") For some reason the code in the runtime suspend/resume hooks got wrong (I suspect in the ambition to cut down boilerplate) and it seems it was tested without CONFIG_PM and crashes like so for me: Unable to handle kernel NULL pointer dereference at virtual address 0000000c pgd = c0204000 [0000000c] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 1 PID: 89 Comm: kworker/1:2 Not tainted 4.7.0-03348-g90dc3680458a-dirty #99 Hardware name: Generic DT based system Workqueue: pm pm_runtime_work task: df3c6300 ti: dec8a000 task.ti: dec8a000 PC is at regulator_disable+0x0/0x6c LR is at bmp280_runtime_suspend+0x3c/0xa4 Dereferencing the BMP280 state container properly fixes the problem, sorry for screwing up. Fixes: 3d838118c6aa ("iio: pressure: bmp280: add power management") Signed-off-by: Linus Walleij Tested-by: Jarkko Nikula Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 94e27b2..e5a533c 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -1079,7 +1079,8 @@ EXPORT_SYMBOL(bmp280_common_remove); #ifdef CONFIG_PM static int bmp280_runtime_suspend(struct device *dev) { - struct bmp280_data *data = dev_get_drvdata(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct bmp280_data *data = iio_priv(indio_dev); int ret; ret = regulator_disable(data->vdda); @@ -1090,7 +1091,8 @@ static int bmp280_runtime_suspend(struct device *dev) static int bmp280_runtime_resume(struct device *dev) { - struct bmp280_data *data = dev_get_drvdata(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct bmp280_data *data = iio_priv(indio_dev); int ret; ret = regulator_enable(data->vddd); -- cgit v0.10.2 From 7b142d8fd0bd4c9bf06ccb72ac4daedb503f0124 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 16 Jun 2016 00:45:33 +0200 Subject: android: binder: fix dangling pointer comparison If /dev/binder is opened and the opener process then e.g. calls execve, proc->vma_vm_mm will still point to the location of the now-freed mm_struct. If the process then calls ioctl(binder_fd, ...), the dangling proc->vma_vm_mm pointer will be compared to current->mm. Let the binder take a reference to the mm_struct to avoid this. v2: use the right refcounter Fixes: a906d6931f3c ("android: binder: Sanity check at binder ioctl") Signed-off-by: Jann Horn Reviewed-by: Chen Feng Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 16288e7..09fdb42 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2962,6 +2962,7 @@ static int binder_open(struct inode *nodp, struct file *filp) return -ENOMEM; get_task_struct(current); proc->tsk = current; + atomic_inc(¤t->mm->mm_count); proc->vma_vm_mm = current->mm; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); @@ -3167,6 +3168,7 @@ static void binder_deferred_release(struct binder_proc *proc) vfree(proc->buffer); } + mmdrop(proc->vma_vm_mm); put_task_struct(proc->tsk); binder_debug(BINDER_DEBUG_OPEN_CLOSE, -- cgit v0.10.2 From f6b6a28e2dbc401416ff12f775d75281c9b41918 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 29 Jul 2016 16:15:18 -0300 Subject: nvme: Prevent controller state invalid transition Acquiring the nvme_ctrl lock before reading ctrl->state in nvme_change_ctrl_state() should prevent a theoretical invalid state transition, in the event of two threads racing inside that function. I haven't been able to observe this happening with the current code, and the current state machine seems to be simple enough to not be affected by these invalid transitions, but future modifications could make it more likely to happen. Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Sagi Grimberg Reviewed-by: Steve Wise Signed-off-by: Jens Axboe diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7ff2e82..7f75d66 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -81,10 +81,12 @@ EXPORT_SYMBOL_GPL(nvme_cancel_request); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { - enum nvme_ctrl_state old_state = ctrl->state; + enum nvme_ctrl_state old_state; bool changed = false; spin_lock_irq(&ctrl->lock); + + old_state = ctrl->state; switch (new_state) { case NVME_CTRL_LIVE: switch (old_state) { @@ -140,11 +142,12 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, default: break; } - spin_unlock_irq(&ctrl->lock); if (changed) ctrl->state = new_state; + spin_unlock_irq(&ctrl->lock); + return changed; } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); -- cgit v0.10.2 From 0d9dcf852334b796bacc7020364afba3122db81e Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 8 Aug 2016 11:14:36 -0700 Subject: iio: humidity: hdc100x: fix sensor data reads of temp and humidity Replace the i2c_smbus_read_byte commmands used to retrieve the sensor data with an i2c_master_recv command. The smbus read byte method fails because the device does not expect a stop condition after sending the first byte. When we issue the second read, we are getting the first byte again. Net effect is that of the 14 bits used for the measurement, the 8 most significant bits are correct, the lower 6 are not. None of the smbus read protocols follow the pattern this device requires (S Addr Rd [A] Data [A] Data NA P), hence the switch to an i2c receive transaction. Applicable from original introduction of this driver, but will require backporting due to churn in the code. Signed-off-by: Alison Schofield Cc: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index a03832a..e0c9c70 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -142,7 +142,7 @@ static int hdc100x_get_measurement(struct hdc100x_data *data, struct i2c_client *client = data->client; int delay = data->adc_int_us[chan->address]; int ret; - int val; + __be16 val; /* start measurement */ ret = i2c_smbus_write_byte(client, chan->address); @@ -154,26 +154,13 @@ static int hdc100x_get_measurement(struct hdc100x_data *data, /* wait for integration time to pass */ usleep_range(delay, delay + 1000); - /* - * i2c_smbus_read_word_data cannot() be used here due to the command - * value not being understood and causes NAKs preventing any reading - * from being accessed. - */ - ret = i2c_smbus_read_byte(client); + /* read measurement */ + ret = i2c_master_recv(data->client, (char *)&val, sizeof(val)); if (ret < 0) { - dev_err(&client->dev, "cannot read high byte measurement"); + dev_err(&client->dev, "cannot read sensor data\n"); return ret; } - val = ret << 8; - - ret = i2c_smbus_read_byte(client); - if (ret < 0) { - dev_err(&client->dev, "cannot read low byte measurement"); - return ret; - } - val |= ret; - - return val; + return be16_to_cpu(val); } static int hdc100x_get_heater_status(struct hdc100x_data *data) @@ -272,8 +259,8 @@ static int hdc100x_probe(struct i2c_client *client, struct iio_dev *indio_dev; struct hdc100x_data *data; - if (!i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_BYTE)) + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA | + I2C_FUNC_SMBUS_BYTE | I2C_FUNC_I2C)) return -EOPNOTSUPP; indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data)); -- cgit v0.10.2 From ddbc719f99cf9aed6918cef98cb3475fd4fc4fa6 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 26 Jul 2016 20:17:43 -0700 Subject: tools: iio: iio_generic_buffer: initialize channel array pointer Uninitialized channel pointer causes segmentation fault when we call free(channel) during cleanup() with no channels initialized. This happens when you exit early for usage errors. Initialize the pointer to NULL when it is declared. Signed-off-by: Alison Schofield Cc: Daniel Baluta Tested-by: Gregor Boirie Signed-off-by: Jonathan Cameron diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 0e8a1f7..ae68bf0 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -348,7 +348,7 @@ int main(int argc, char **argv) int notrigger = 0; char *dummy; - struct iio_channel_info *channels; + struct iio_channel_info *channels = NULL; register_cleanup(); -- cgit v0.10.2 From c2ab447454d498e709d9011c0f2d2945ee321f9b Mon Sep 17 00:00:00 2001 From: Anders Darander Date: Mon, 8 Aug 2016 14:42:16 +0200 Subject: iio: adc: at91: unbreak channel adc channel 3 The driver always assumes that an input device has been created when reading channel 3. This causes a kernel panic when dereferencing st->ts_input. The change was introduced in commit 84882b060301 ("iio: adc: at91_adc: Add support for touchscreens without TSMR"). Earlier versions only entered that part of the if-else statement if only the following flags are set: AT91_ADC_IER_XRDY | AT91_ADC_IER_YRDY | AT91_ADC_IER_PRDY Signed-off-by: Anders Darander Acked-by: Alexandre Belloni Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 52430ba..0438c68 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -381,8 +381,8 @@ static irqreturn_t at91_adc_rl_interrupt(int irq, void *private) st->ts_bufferedmeasure = false; input_report_key(st->ts_input, BTN_TOUCH, 0); input_sync(st->ts_input); - } else if (status & AT91_ADC_EOC(3)) { - /* Conversion finished */ + } else if (status & AT91_ADC_EOC(3) && st->ts_input) { + /* Conversion finished and we've a touchscreen */ if (st->ts_bufferedmeasure) { /* * Last measurement is always discarded, since it can -- cgit v0.10.2 From 34276bb062b8449b3b0a208c9b848a1a27920075 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 15 Aug 2016 14:58:43 +0200 Subject: of: fix reference counting in of_graph_get_endpoint_by_regs The called of_graph_get_next_endpoint() already decrements the refcount of the prev node, so it is wrong to do it again in the calling function. Use the for_each_endpoint_of_node() helper to interate through the endpoint OF nodes, which already does the right thing and simplifies the code a bit. Fixes: 8ccd0d0ca041 (of: add helper for getting endpoint node of specific identifiers) Cc: stable@vger.kernel.org Reported-by: David Jander Signed-off-by: Lucas Stach Acked-by: Philipp Zabel Signed-off-by: Rob Herring diff --git a/drivers/of/base.c b/drivers/of/base.c index cb255a0..3ce6953 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -2342,20 +2342,13 @@ struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { struct of_endpoint endpoint; - struct device_node *node, *prev_node = NULL; - - while (1) { - node = of_graph_get_next_endpoint(parent, prev_node); - of_node_put(prev_node); - if (!node) - break; + struct device_node *node = NULL; + for_each_endpoint_of_node(parent, node) { of_graph_parse_endpoint(node, &endpoint); if (((port_reg == -1) || (endpoint.port == port_reg)) && ((reg == -1) || (endpoint.id == reg))) return node; - - prev_node = node; } return NULL; -- cgit v0.10.2 From 99f1c013194e64d4b67d5d318148303b0e1585e1 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Thu, 14 Jul 2016 23:40:21 -0400 Subject: staging/lustre/llite: Close atomic_open race with several openers Right now, if it's an open of a negative dentry, a race is possible with several openers who all try to instantiate/rehash the same dentry and would hit a BUG_ON in d_add. But in fact if we got a negative dentry in atomic_open, that means we just revalidated it so no point in talking to MDS at all, just return ENOENT and make the race go away completely. Signed-off-by: Oleg Drokin Cc: stable # 4.7+ Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index 3664bfd..2c4dc69 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -388,6 +388,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, struct inode *inode = NULL; __u64 bits = 0; int rc = 0; + struct dentry *alias; /* NB 1 request reference will be taken away by ll_intent_lock() * when I return @@ -412,26 +413,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, */ } - /* Only hash *de if it is unhashed (new dentry). - * Atoimc_open may passing hashed dentries for open. - */ - if (d_unhashed(*de)) { - struct dentry *alias; - - alias = ll_splice_alias(inode, *de); - if (IS_ERR(alias)) { - rc = PTR_ERR(alias); - goto out; - } - *de = alias; - } else if (!it_disposition(it, DISP_LOOKUP_NEG) && - !it_disposition(it, DISP_OPEN_CREATE)) { - /* With DISP_OPEN_CREATE dentry will be - * instantiated in ll_create_it. - */ - LASSERT(!d_inode(*de)); - d_instantiate(*de, inode); + alias = ll_splice_alias(inode, *de); + if (IS_ERR(alias)) { + rc = PTR_ERR(alias); + goto out; } + *de = alias; if (!it_disposition(it, DISP_LOOKUP_NEG)) { /* we have lookup look - unhide dentry */ @@ -587,6 +574,24 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, *opened); + /* Only negative dentries enter here */ + LASSERT(!d_inode(dentry)); + + if (!d_in_lookup(dentry)) { + /* A valid negative dentry that just passed revalidation, + * there's little point to try and open it server-side, + * even though there's a minuscle chance it might succeed. + * Either way it's a valid race to just return -ENOENT here. + */ + if (!(open_flags & O_CREAT)) + return -ENOENT; + + /* Otherwise we just unhash it to be rehashed afresh via + * lookup if necessary + */ + d_drop(dentry); + } + it = kzalloc(sizeof(*it), GFP_NOFS); if (!it) return -ENOMEM; -- cgit v0.10.2 From 12311959ecf8a3a64676c01b62ce67a0c5f0fd49 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 12 Aug 2016 20:10:44 +0200 Subject: rhashtable: fix shift by 64 when shrinking I got this: ================================================================================ UBSAN: Undefined behaviour in ./include/linux/log2.h:63:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' CPU: 1 PID: 721 Comm: kworker/1:1 Not tainted 4.8.0-rc1+ #87 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 Workqueue: events rht_deferred_worker 0000000000000000 ffff88011661f8d8 ffffffff82344f50 0000000041b58ab3 ffffffff84f98000 ffffffff82344ea4 ffff88011661f900 ffff88011661f8b0 0000000000000001 ffff88011661f6b8 dffffc0000000000 ffffffff867f7640 Call Trace: [] dump_stack+0xac/0xfc [] ? _atomic_dec_and_lock+0xc4/0xc4 [] ubsan_epilogue+0xd/0x8a [] __ubsan_handle_shift_out_of_bounds+0x255/0x29a [] ? __ubsan_handle_out_of_bounds+0x180/0x180 [] ? nl80211_req_set_reg+0x256/0x2f0 [] ? print_context_stack+0x8a/0x160 [] ? amd_pmu_reset+0x341/0x380 [] rht_deferred_worker+0x1618/0x1790 [] ? rht_deferred_worker+0x1618/0x1790 [] ? rhashtable_jhash2+0x370/0x370 [] ? process_one_work+0x6fd/0x1970 [] process_one_work+0x79f/0x1970 [] ? process_one_work+0x6fd/0x1970 [] ? try_to_grab_pending+0x4c0/0x4c0 [] ? worker_thread+0x1c4/0x1340 [] worker_thread+0x55f/0x1340 [] ? __schedule+0x4df/0x1d40 [] ? process_one_work+0x1970/0x1970 [] ? process_one_work+0x1970/0x1970 [] kthread+0x237/0x390 [] ? __kthread_parkme+0x280/0x280 [] ? _raw_spin_unlock_irq+0x33/0x50 [] ret_from_fork+0x1f/0x40 [] ? __kthread_parkme+0x280/0x280 ================================================================================ roundup_pow_of_two() is undefined when called with an argument of 0, so let's avoid the call and just fall back to ht->p.min_size (which should never be smaller than HASH_MIN_SIZE). Cc: Herbert Xu Signed-off-by: Vegard Nossum Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 42acd81..5ba520b 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -324,12 +324,14 @@ static int rhashtable_expand(struct rhashtable *ht) static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); - unsigned int size; + unsigned int nelems = atomic_read(&ht->nelems); + unsigned int size = 0; int err; ASSERT_RHT_MUTEX(ht); - size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + if (nelems) + size = roundup_pow_of_two(nelems * 3 / 2); if (size < ht->p.min_size) size = ht->p.min_size; -- cgit v0.10.2 From 5e457896986e16c440c97bb94b9ccd95dd157292 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Sat, 13 Aug 2016 01:13:38 +0900 Subject: net: ipv6: Fix ping to link-local addresses. ping_v6_sendmsg does not set flowi6_oif in response to sin6_scope_id or sk_bound_dev_if, so it is not possible to use these APIs to ping an IPv6 address on a different interface. Instead, it sets flowi6_iif, which is incorrect but harmless. Stop setting flowi6_iif, and support various ways of setting oif in the same priority order used by udpv6_sendmsg. Tested: https://android-review.googlesource.com/#/c/254470/ Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index fed40d1..0900352 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -55,7 +55,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct icmp6hdr user_icmph; int addr_type; struct in6_addr *daddr; - int iif = 0; + int oif = 0; struct flowi6 fl6; int err; struct dst_entry *dst; @@ -78,25 +78,30 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (u->sin6_family != AF_INET6) { return -EAFNOSUPPORT; } - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != u->sin6_scope_id) { - return -EINVAL; - } daddr = &(u->sin6_addr); - iif = u->sin6_scope_id; + if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) + oif = u->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = &sk->sk_v6_daddr; } - if (!iif) - iif = sk->sk_bound_dev_if; + if (!oif) + oif = sk->sk_bound_dev_if; + + if (!oif) + oif = np->sticky_pktinfo.ipi6_ifindex; + + if (!oif && ipv6_addr_is_multicast(daddr)) + oif = np->mcast_oif; + else if (!oif) + oif = np->ucast_oif; addr_type = ipv6_addr_type(daddr); - if (__ipv6_addr_needs_scope_id(addr_type) && !iif) - return -EINVAL; - if (addr_type & IPV6_ADDR_MAPPED) + if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || + (addr_type & IPV6_ADDR_MAPPED) || + (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if)) return -EINVAL; /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ @@ -106,16 +111,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; - else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; - ipc6.tclass = np->tclass; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); -- cgit v0.10.2 From 3d7b33209201cbfa090d614db993571ca3c6b090 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 15 Aug 2016 13:06:24 +0200 Subject: gre: set inner_protocol on xmit Ensure that the inner_protocol is set on transmit so that GSO segmentation, which relies on that field, works correctly. This is achieved by setting the inner_protocol in gre_build_header rather than each caller of that function. It ensures that the inner_protocol is set when gre_fb_xmit() is used to transmit GRE which was not previously the case. I have observed this is not the case when OvS transmits GRE using lwtunnel metadata (which it always does). Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol") Cc: Pravin Shelar Acked-by: Alexander Duyck Signed-off-by: Simon Horman Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/include/net/gre.h b/include/net/gre.h index 7a54a31..73ea256 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -104,6 +104,7 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len, skb_push(skb, hdr_len); + skb_set_inner_protocol(skb, proto); skb_reset_transport_header(skb); greh = (struct gre_base_hdr *)skb->data; greh->flags = gre_tnl_flags_to_gre_flags(flags); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5b1481b..113cc43 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -370,7 +370,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->parms.o_flags, proto, tunnel->parms.o_key, htonl(tunnel->o_seqno)); - skb_set_inner_protocol(skb, proto); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 776d145..704274c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -519,8 +519,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); - skb_set_inner_protocol(skb, protocol); - return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); } -- cgit v0.10.2 From f9a7da9130ef0143eb900794c7863dc5c9051fbc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 15 Aug 2016 17:48:39 +0200 Subject: hv_netvsc: don't lose VF information struct netvsc_device is not suitable for storing VF information as this structure is being destroyed on MTU change / set channel operation (see rndis_filter_device_remove()). Move all VF related stuff to struct net_device_context which is persistent. Signed-off-by: Vitaly Kuznetsov Acked-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 467fb8b..3b3ecf2 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -647,7 +647,7 @@ struct netvsc_reconfig { struct garp_wrk { struct work_struct dwrk; struct net_device *netdev; - struct netvsc_device *netvsc_dev; + struct net_device_context *net_device_ctx; }; /* The context of the netvsc device */ @@ -678,6 +678,15 @@ struct net_device_context { /* the device is going away */ bool start_remove; + + /* State to manage the associated VF interface. */ + struct net_device *vf_netdev; + bool vf_inject; + atomic_t vf_use_cnt; + /* 1: allocated, serial number is valid. 0: not allocated */ + u32 vf_alloc; + /* Serial number of the VF to team with */ + u32 vf_serial; }; /* Per netvsc device */ @@ -733,15 +742,7 @@ struct netvsc_device { u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ - /* 1: allocated, serial number is valid. 0: not allocated */ - u32 vf_alloc; - /* Serial number of the VF to team with */ - u32 vf_serial; atomic_t open_cnt; - /* State to manage the associated VF interface. */ - bool vf_inject; - struct net_device *vf_netdev; - atomic_t vf_use_cnt; }; static inline struct netvsc_device * diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 20e0917..410fb8e8 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; atomic_set(&net_device->open_cnt, 0); - atomic_set(&net_device->vf_use_cnt, 0); net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; - net_device->vf_netdev = NULL; - net_device->vf_inject = false; - return net_device; } @@ -1106,16 +1102,16 @@ static void netvsc_send_table(struct hv_device *hdev, nvscdev->send_table[i] = tab[i]; } -static void netvsc_send_vf(struct netvsc_device *nvdev, +static void netvsc_send_vf(struct net_device_context *net_device_ctx, struct nvsp_message *nvmsg) { - nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; - nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; + net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; + net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; } static inline void netvsc_receive_inband(struct hv_device *hdev, - struct netvsc_device *nvdev, - struct nvsp_message *nvmsg) + struct net_device_context *net_device_ctx, + struct nvsp_message *nvmsg) { switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: @@ -1123,7 +1119,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev, break; case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: - netvsc_send_vf(nvdev, nvmsg); + netvsc_send_vf(net_device_ctx, nvmsg); break; } } @@ -1136,6 +1132,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, struct vmpacket_descriptor *desc) { struct nvsp_message *nvmsg; + struct net_device_context *net_device_ctx = netdev_priv(ndev); nvmsg = (struct nvsp_message *)((unsigned long) desc + (desc->offset8 << 3)); @@ -1150,7 +1147,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, break; case VM_PKT_DATA_INBAND: - netvsc_receive_inband(device, net_device, nvmsg); + netvsc_receive_inband(device, net_device_ctx, nvmsg); break; default: diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 41bd952..794139b 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -658,20 +658,19 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct sk_buff *skb; struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; - struct netvsc_device *netvsc_dev = net_device_ctx->nvdev; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - if (READ_ONCE(netvsc_dev->vf_inject)) { - atomic_inc(&netvsc_dev->vf_use_cnt); - if (!READ_ONCE(netvsc_dev->vf_inject)) { + if (READ_ONCE(net_device_ctx->vf_inject)) { + atomic_inc(&net_device_ctx->vf_use_cnt); + if (!READ_ONCE(net_device_ctx->vf_inject)) { /* * We raced; just move on. */ - atomic_dec(&netvsc_dev->vf_use_cnt); + atomic_dec(&net_device_ctx->vf_use_cnt); goto vf_injection_done; } @@ -683,17 +682,19 @@ int netvsc_recv_callback(struct hv_device *device_obj, * the host). Deliver these via the VF interface * in the guest. */ - vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet, - csum_info, *data, vlan_tci); + vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, + packet, csum_info, *data, + vlan_tci); if (vf_skb != NULL) { - ++netvsc_dev->vf_netdev->stats.rx_packets; - netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd; + ++net_device_ctx->vf_netdev->stats.rx_packets; + net_device_ctx->vf_netdev->stats.rx_bytes += + bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; ret = NVSP_STAT_FAIL; } - atomic_dec(&netvsc_dev->vf_use_cnt); + atomic_dec(&net_device_ctx->vf_use_cnt); return ret; } @@ -1158,7 +1159,7 @@ static void netvsc_notify_peers(struct work_struct *wrk) netdev_notify_peers(gwrk->netdev); - atomic_dec(&gwrk->netvsc_dev->vf_use_cnt); + atomic_dec(&gwrk->net_device_ctx->vf_use_cnt); } static struct net_device *get_netvsc_net_device(char *mac) @@ -1211,7 +1212,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * Take a reference on the module. */ try_module_get(THIS_MODULE); - netvsc_dev->vf_netdev = vf_netdev; + net_device_ctx->vf_netdev = vf_netdev; return NOTIFY_OK; } @@ -1233,11 +1234,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - netvsc_dev->vf_inject = true; + net_device_ctx->vf_inject = true; /* * Open the device before switching data path. @@ -1257,9 +1258,9 @@ static int netvsc_vf_up(struct net_device *vf_netdev) * notify peers; take a reference to prevent * the VF interface from vanishing. */ - atomic_inc(&netvsc_dev->vf_use_cnt); + atomic_inc(&net_device_ctx->vf_use_cnt); net_device_ctx->gwrk.netdev = vf_netdev; - net_device_ctx->gwrk.netvsc_dev = netvsc_dev; + net_device_ctx->gwrk.net_device_ctx = net_device_ctx; schedule_work(&net_device_ctx->gwrk.dwrk); return NOTIFY_OK; @@ -1283,17 +1284,17 @@ static int netvsc_vf_down(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_dev->vf_inject = false; + net_device_ctx->vf_inject = false; /* * Wait for currently active users to * drain out. */ - while (atomic_read(&netvsc_dev->vf_use_cnt) != 0) + while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) udelay(50); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); @@ -1302,9 +1303,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev) /* * Notify peers. */ - atomic_inc(&netvsc_dev->vf_use_cnt); + atomic_inc(&net_device_ctx->vf_use_cnt); net_device_ctx->gwrk.netdev = ndev; - net_device_ctx->gwrk.netvsc_dev = netvsc_dev; + net_device_ctx->gwrk.net_device_ctx = net_device_ctx; schedule_work(&net_device_ctx->gwrk.dwrk); return NOTIFY_OK; @@ -1331,7 +1332,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_dev->vf_netdev = NULL; + net_device_ctx->vf_netdev = NULL; module_put(THIS_MODULE); return NOTIFY_OK; } @@ -1382,6 +1383,10 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); + atomic_set(&net_device_ctx->vf_use_cnt, 0); + net_device_ctx->vf_netdev = NULL; + net_device_ctx->vf_inject = false; + net->netdev_ops = &device_ops; net->hw_features = NETVSC_HW_FEATURES; -- cgit v0.10.2 From d072218f214929194db06069564495b6b9fff34a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 15 Aug 2016 17:48:40 +0200 Subject: hv_netvsc: avoid deadlocks between rtnl lock and vf_use_cnt wait Here is a deadlock scenario: - netvsc_vf_up() schedules netvsc_notify_peers() work and quits. - netvsc_vf_down() runs before netvsc_notify_peers() gets executed. As it is being executed from netdev notifier chain we hold rtnl lock when we get here. - we enter while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) loop and wait till netvsc_notify_peers() drops vf_use_cnt. - netvsc_notify_peers() starts on some other CPU but netdev_notify_peers() will hang on rtnl_lock(). - deadlock! Instead of introducing additional synchronization I suggest we drop gwrk.dwrk completely and call NETDEV_NOTIFY_PEERS directly. As we're acting under rtnl lock this is legitimate. Signed-off-by: Vitaly Kuznetsov Acked-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 3b3ecf2..591af71 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -644,12 +644,6 @@ struct netvsc_reconfig { u32 event; }; -struct garp_wrk { - struct work_struct dwrk; - struct net_device *netdev; - struct net_device_context *net_device_ctx; -}; - /* The context of the netvsc device */ struct net_device_context { /* point back to our device context */ @@ -667,7 +661,6 @@ struct net_device_context { struct work_struct work; u32 msg_enable; /* debug level */ - struct garp_wrk gwrk; struct netvsc_stats __percpu *tx_stats; struct netvsc_stats __percpu *rx_stats; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 794139b..70317fa 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1151,17 +1151,6 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } -static void netvsc_notify_peers(struct work_struct *wrk) -{ - struct garp_wrk *gwrk; - - gwrk = container_of(wrk, struct garp_wrk, dwrk); - - netdev_notify_peers(gwrk->netdev); - - atomic_dec(&gwrk->net_device_ctx->vf_use_cnt); -} - static struct net_device *get_netvsc_net_device(char *mac) { struct net_device *dev, *found = NULL; @@ -1253,15 +1242,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev) netif_carrier_off(ndev); - /* - * Now notify peers. We are scheduling work to - * notify peers; take a reference to prevent - * the VF interface from vanishing. - */ - atomic_inc(&net_device_ctx->vf_use_cnt); - net_device_ctx->gwrk.netdev = vf_netdev; - net_device_ctx->gwrk.net_device_ctx = net_device_ctx; - schedule_work(&net_device_ctx->gwrk.dwrk); + /* Now notify peers through VF device. */ + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev); return NOTIFY_OK; } @@ -1300,13 +1282,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev) netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); netif_carrier_on(ndev); - /* - * Notify peers. - */ - atomic_inc(&net_device_ctx->vf_use_cnt); - net_device_ctx->gwrk.netdev = ndev; - net_device_ctx->gwrk.net_device_ctx = net_device_ctx; - schedule_work(&net_device_ctx->gwrk.dwrk); + + /* Now notify peers through netvsc device. */ + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev); return NOTIFY_OK; } @@ -1378,7 +1356,6 @@ static int netvsc_probe(struct hv_device *dev, INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); - INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers); spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); -- cgit v0.10.2 From 57c1826b991244d2144eb6e3d5d1b13a53cbea63 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 15 Aug 2016 17:48:41 +0200 Subject: hv_netvsc: reset vf_inject on VF removal We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while vf_netdev is already NULL and we're trying to inject packets into NULL net device in netvsc_recv_callback() causing kernel to crash. Signed-off-by: Vitaly Kuznetsov Acked-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 70317fa..2c90883 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1205,6 +1205,19 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static void netvsc_inject_enable(struct net_device_context *net_device_ctx) +{ + net_device_ctx->vf_inject = true; +} + +static void netvsc_inject_disable(struct net_device_context *net_device_ctx) +{ + net_device_ctx->vf_inject = false; + + /* Wait for currently active users to drain out. */ + while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) + udelay(50); +} static int netvsc_vf_up(struct net_device *vf_netdev) { @@ -1227,7 +1240,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - net_device_ctx->vf_inject = true; + netvsc_inject_enable(net_device_ctx); /* * Open the device before switching data path. @@ -1270,14 +1283,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - net_device_ctx->vf_inject = false; - /* - * Wait for currently active users to - * drain out. - */ - - while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) - udelay(50); + netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); @@ -1309,7 +1315,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) if (netvsc_dev == NULL) return NOTIFY_DONE; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - + netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; module_put(THIS_MODULE); return NOTIFY_OK; -- cgit v0.10.2 From 0f20d795f78d182c4b743d880a5e8dc4d39892fe Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 15 Aug 2016 17:48:42 +0200 Subject: hv_netvsc: protect module refcount by checking net_device_ctx->vf_netdev We're not guaranteed to see NETDEV_REGISTER/NETDEV_UNREGISTER notifications only once per VF but we increase/decrease module refcount unconditionally. Check vf_netdev to make sure we don't take/release it twice. We presume that only one VF per netvsc device may exist. Signed-off-by: Vitaly Kuznetsov Acked-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2c90883..62a4e6e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1193,7 +1193,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (netvsc_dev == NULL) + if (!netvsc_dev || net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); @@ -1312,7 +1312,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (netvsc_dev == NULL) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); -- cgit v0.10.2 From 0dbff144a1e7310e2f8b7a957352c4be9aeb38e4 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 15 Aug 2016 17:48:43 +0200 Subject: hv_netvsc: fix bonding devices check in netvsc_netdev_event() Bonding driver sets IFF_BONDING on both master (the bonding device) and slave (the real NIC) devices and in netvsc_netdev_event() we want to skip master devices only. Currently, there is an uncertainty when a slave interface is removed: if bonding module comes first in netdev_chain it clears IFF_BONDING flag on the netdev and netvsc_netdev_event() correctly handles NETDEV_UNREGISTER event, but in case netvsc comes first on the chain it sees the device with IFF_BONDING still attached and skips it. As we still hold vf_netdev pointer to the device we crash on the next inject. Signed-off-by: Vitaly Kuznetsov Acked-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 62a4e6e..3ba29fc 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1482,8 +1482,13 @@ static int netvsc_netdev_event(struct notifier_block *this, { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); - /* Avoid Vlan, Bonding dev with same MAC registering as VF */ - if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING)) + /* Avoid Vlan dev with same MAC registering as VF */ + if (event_dev->priv_flags & IFF_802_1Q_VLAN) + return NOTIFY_DONE; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (event_dev->priv_flags & IFF_BONDING && + event_dev->flags & IFF_MASTER) return NOTIFY_DONE; switch (event) { -- cgit v0.10.2 From d2fbdf76b85bcdfe57b8ef2ba09d20e8ada79abd Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 23 Jul 2016 08:15:04 +0200 Subject: tipc: fix NULL pointer dereference in shutdown() tipc_msg_create() can return a NULL skb and if so, we shouldn't try to call tipc_node_xmit_skb() on it. general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 3 PID: 30298 Comm: trinity-c0 Not tainted 4.7.0-rc7+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff8800baf09980 ti: ffff8800595b8000 task.ti: ffff8800595b8000 RIP: 0010:[] [] tipc_node_xmit_skb+0x6b/0x140 RSP: 0018:ffff8800595bfce8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003023b0e0 RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff83d12580 RBP: ffff8800595bfd78 R08: ffffed000b2b7f32 R09: 0000000000000000 R10: fffffbfff0759725 R11: 0000000000000000 R12: 1ffff1000b2b7f9f R13: ffff8800595bfd58 R14: ffffffff83d12580 R15: dffffc0000000000 FS: 00007fcdde242700(0000) GS:ffff88011af80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fcddde1db10 CR3: 000000006874b000 CR4: 00000000000006e0 DR0: 00007fcdde248000 DR1: 00007fcddd73d000 DR2: 00007fcdde248000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602 Stack: 0000000000000018 0000000000000018 0000000041b58ab3 ffffffff83954208 ffffffff830bb400 ffff8800595bfd30 ffffffff8309d767 0000000000000018 0000000000000018 ffff8800595bfd78 ffffffff8309da1a 00000000810ee611 Call Trace: [] tipc_shutdown+0x553/0x880 [] SyS_shutdown+0x14b/0x170 [] do_syscall_64+0x19c/0x410 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: 90 00 b4 0b 83 c7 00 f1 f1 f1 f1 4c 8d 6d e0 c7 40 04 00 00 00 f4 c7 40 08 f3 f3 f3 f3 48 89 d8 48 c1 e8 03 c7 45 b4 00 00 00 00 <80> 3c 30 00 75 78 48 8d 7b 08 49 8d 75 c0 48 b8 00 00 00 00 00 RIP [] tipc_node_xmit_skb+0x6b/0x140 RSP ---[ end trace 57b0484e351e71f1 ]--- I feel like we should maybe return -ENOMEM or -ENOBUFS, but I'm not sure userspace is equipped to handle that. Anyway, this is better than a GPF and looks somewhat consistent with other tipc_msg_create() callers. Signed-off-by: Vegard Nossum Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c49b8df..f9f5f3c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2180,7 +2180,8 @@ restart: TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, onode, dport, oport, TIPC_CONN_SHUTDOWN); - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; -- cgit v0.10.2 From d9ae449b3d14d7c55f69af329972f175d180e68d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 3 Aug 2016 18:03:44 +0200 Subject: KVM: arm64: vgic-its: Make updates to propbaser/pendbaser atomic There are two problems with the current implementation of the MMIO handlers for the propbaser and pendbaser: First, the write to the value itself is not guaranteed to be an atomic 64-bit write so two concurrent writes to the structure field could be intermixed. Second, because we do a read-modify-update operation without any synchronization, if we have two 32-bit accesses to separate parts of the register, we can loose one of them. By using the atomic cmpxchg64 we should cover both issues above. Reviewed-by: Andre Przywara Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index ff668e0..90d8181 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -306,16 +306,19 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u64 propbaser = dist->propbaser; + u64 old_propbaser, propbaser; /* Storing a value with LPIs already enabled is undefined */ if (vgic_cpu->lpis_enabled) return; - propbaser = update_64bit_reg(propbaser, addr & 4, len, val); - propbaser = vgic_sanitise_propbaser(propbaser); - - dist->propbaser = propbaser; + do { + old_propbaser = dist->propbaser; + propbaser = old_propbaser; + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); + propbaser = vgic_sanitise_propbaser(propbaser); + } while (cmpxchg64(&dist->propbaser, old_propbaser, + propbaser) != old_propbaser); } static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, @@ -331,16 +334,19 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u64 pendbaser = vgic_cpu->pendbaser; + u64 old_pendbaser, pendbaser; /* Storing a value with LPIs already enabled is undefined */ if (vgic_cpu->lpis_enabled) return; - pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); - pendbaser = vgic_sanitise_pendbaser(pendbaser); - - vgic_cpu->pendbaser = pendbaser; + do { + old_pendbaser = vgic_cpu->pendbaser; + pendbaser = old_pendbaser; + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); + pendbaser = vgic_sanitise_pendbaser(pendbaser); + } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser, + pendbaser) != old_pendbaser); } /* -- cgit v0.10.2 From c7735769d5dd79afb07254532fabd9ccbd85b1fa Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 8 Aug 2016 16:45:43 +0100 Subject: KVM: arm64: ITS: move ITS registration into first VCPU run Currently we register an ITS device upon userland issuing the CTLR_INIT ioctl to mark initialization of the ITS as done. This deviates from the initialization sequence of the existing GIC devices and does not play well with the way QEMU handles things. To be more in line with what we are used to, register the ITS(es) just before the first VCPU is about to run, so in the map_resources() call. This involves iterating through the list of KVM devices and map each ITS that we find. Signed-off-by: Andre Przywara Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index d06330a..1cf9f59 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1319,13 +1319,13 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu) its_sync_lpi_pending_table(vcpu); } -static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its) +static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) { struct vgic_io_device *iodev = &its->iodev; int ret; - if (its->initialized) - return 0; + if (!its->initialized) + return -EBUSY; if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) return -ENXIO; @@ -1342,9 +1342,6 @@ static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its) KVM_VGIC_V3_ITS_SIZE, &iodev->dev); mutex_unlock(&kvm->slots_lock); - if (!ret) - its->initialized = true; - return ret; } @@ -1466,9 +1463,6 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (type != KVM_VGIC_ITS_ADDR_TYPE) return -ENODEV; - if (its->initialized) - return -EBUSY; - if (copy_from_user(&addr, uaddr, sizeof(addr))) return -EFAULT; @@ -1484,7 +1478,9 @@ static int vgic_its_set_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_CTRL: switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: - return vgic_its_init_its(dev->kvm, its); + its->initialized = true; + + return 0; } break; } @@ -1529,3 +1525,30 @@ int kvm_vgic_register_its_device(void) return kvm_register_device_ops(&kvm_arm_vgic_its_ops, KVM_DEV_TYPE_ARM_VGIC_ITS); } + +/* + * Registers all ITSes with the kvm_io_bus framework. + * To follow the existing VGIC initialization sequence, this has to be + * done as late as possible, just before the first VCPU runs. + */ +int vgic_register_its_iodevs(struct kvm *kvm) +{ + struct kvm_device *dev; + int ret = 0; + + list_for_each_entry(dev, &kvm->devices, vm_node) { + if (dev->ops != &kvm_arm_vgic_its_ops) + continue; + + ret = vgic_register_its_iodev(kvm, dev->private); + if (ret) + return ret; + /* + * We don't need to care about tearing down previously + * registered ITSes, as the kvm_io_bus framework removes + * them for us if the VM gets destroyed. + */ + } + + return ret; +} diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 0506543..9f0dae3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -289,6 +289,14 @@ int vgic_v3_map_resources(struct kvm *kvm) goto out; } + if (vgic_has_its(kvm)) { + ret = vgic_register_its_iodevs(kvm); + if (ret) { + kvm_err("Unable to register VGIC ITS MMIO regions\n"); + goto out; + } + } + dist->ready = true; out: diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 1d8e21d..6c4625c 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -84,6 +84,7 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); +int vgic_register_its_iodevs(struct kvm *kvm); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); void vgic_enable_lpis(struct kvm_vcpu *vcpu); @@ -140,6 +141,11 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm, return -ENODEV; } +static inline int vgic_register_its_iodevs(struct kvm *kvm) +{ + return -ENODEV; +} + static inline bool vgic_has_its(struct kvm *kvm) { return false; -- cgit v0.10.2 From 505a19eec49ab36b314a05bc062749ebdfb0aa90 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 9 Aug 2016 10:54:29 +0100 Subject: KVM: arm64: check for ITS device on MSI injection When userspace provides the doorbell address for an MSI to be injected into the guest, we find a KVM device which feels responsible. Lets check that this device is really an emulated ITS before we make real use of the container_of-ed pointer. [ Moved NULL-pointer check to caller of static function - Christoffer ] Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1cf9f59..9533080 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -471,6 +471,21 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, return 0; } +static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) +{ + struct vgic_io_device *iodev; + + if (dev->ops != &kvm_io_gic_ops) + return NULL; + + iodev = container_of(dev, struct vgic_io_device, dev); + + if (iodev->iodev_type != IODEV_ITS) + return NULL; + + return iodev; +} + /* * Queries the KVM IO bus framework to get the ITS pointer from the given * doorbell address. @@ -494,9 +509,11 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); if (!kvm_io_dev) - return -ENODEV; + return -EINVAL; - iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + iodev = vgic_get_its_iodev(kvm_io_dev); + if (!iodev) + return -EINVAL; mutex_lock(&iodev->its->its_lock); ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); -- cgit v0.10.2 From 33e7664a0af6e9a516f01014f39737aaa119b6d9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 26 Jul 2016 14:49:04 +0000 Subject: power_supply: tps65217-charger: fix missing platform_set_drvdata() Add missing platform_set_drvdata() in tps65217_charger_probe(), otherwise calling platform_get_drvdata() in remove returns NULL. This is detected by Coccinelle semantic patch. Fixes: 3636859b280c ("power_supply: Add support for tps65217-charger") Signed-off-by: Wei Yongjun Signed-off-by: Sebastian Reichel diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c index 73dfae4..4c56e54 100644 --- a/drivers/power/tps65217_charger.c +++ b/drivers/power/tps65217_charger.c @@ -206,6 +206,7 @@ static int tps65217_charger_probe(struct platform_device *pdev) if (!charger) return -ENOMEM; + platform_set_drvdata(pdev, charger); charger->tps = tps; charger->dev = &pdev->dev; -- cgit v0.10.2 From 5d87f493ddb1b86a0569fa3c4037fa9efc0c7183 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 Aug 2016 04:07:32 +0200 Subject: x86/power/64: Use __pa() for physical address computation The value of temp_level4_pgt is the physical address of the top-level page directory, so use __pa() to compute it. Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index a3e3ccc..9634557 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -113,7 +113,7 @@ static int set_up_temporary_mappings(void) return result; } - temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET; + temp_level4_pgt = __pa(pgd); return 0; } -- cgit v0.10.2 From 8ca7f4fe0733342c862b8585dd6eb6521b9bf533 Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" Date: Tue, 16 Aug 2016 13:55:13 +0800 Subject: net: ethernet: mediatek: fix RMII mode and add REVMII supported by GMAC The patch fixes up the incorrect setup of reduced MII (RMII) on GMAC and adds the supplement for the setup of reverse MII (REVMII) on GMAC , and rearranges the error handling for invalid PHY argument. Signed-off-by: Sean Wang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 0030361..bd0ea05 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -245,12 +245,16 @@ static int mtk_phy_connect(struct mtk_mac *mac) case PHY_INTERFACE_MODE_MII: ge_mode = 1; break; - case PHY_INTERFACE_MODE_RMII: + case PHY_INTERFACE_MODE_REVMII: ge_mode = 2; break; + case PHY_INTERFACE_MODE_RMII: + if (!mac->id) + goto err_phy; + ge_mode = 3; + break; default: - dev_err(eth->dev, "invalid phy_mode\n"); - return -1; + goto err_phy; } /* put the gmac into the right mode */ @@ -272,6 +276,11 @@ static int mtk_phy_connect(struct mtk_mac *mac) of_node_put(np); return 0; + +err_phy: + of_node_put(np); + dev_err(eth->dev, "invalid phy_mode\n"); + return -EINVAL; } static int mtk_mdio_init(struct mtk_eth *eth) -- cgit v0.10.2 From b2025c7cc92d5bfc8c5ce756c8d8a6f57c776fbd Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" Date: Tue, 16 Aug 2016 13:55:14 +0800 Subject: net: ethernet: mediatek: fix flow control settings on GMAC0 is not being enabled properly Commit 08ef55c6f257acf3bdc6940813f80e8f0f5d90ec ("net-next: mediatek: fix gigabit and flow control advertisement") had supported proper flow control settings for GMAC1. But for GMAC0, 1.GMAC0 shares the common logic with GMAC1 inside mtk_phy_link_adjust() to adapt various settings for the target phy. 2.GMAC0 uses fixed-phy to connect to a builtin gigabit switch with fixed link speed as commit 0c72c50f6f93b0c3daa9ea35d89ab3a933c7b5a0 ("net-next: mediatek: add fixed-phy support") describes. 3.However, fixed-phy doesn't enable SUPPORTED_Pause & SUPPORTED_Asym_Pause supported flag on default that would cause mtk_phy_link_adjust() not to enable flow control setting on GMAC0 properly and cause packet dropped when high traffic. Due to these reasons, the patch adds SUPPORTED_Pause & SUPPORTED_Asym_Pause supported flags on fixed-phy used by the driver to have proper handling on the both GMAC with the shared common logic. Signed-off-by: Sean Wang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index bd0ea05..9901527 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -267,6 +267,11 @@ static int mtk_phy_connect(struct mtk_mac *mac) mac->phy_dev->autoneg = AUTONEG_ENABLE; mac->phy_dev->speed = 0; mac->phy_dev->duplex = 0; + + if (of_phy_is_fixed_link(mac->of_node)) + mac->phy_dev->supported |= + SUPPORTED_Pause | SUPPORTED_Asym_Pause; + mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause; mac->phy_dev->advertising = mac->phy_dev->supported | -- cgit v0.10.2 From 55a4e778191cfcf675aa1f9716edb71a3014d5fb Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" Date: Tue, 16 Aug 2016 13:55:15 +0800 Subject: net: ethernet: mediatek: fix runtime warning raised by inconsistent struct device pointers passed to DMA API Runtime warning occurs if DMA-API debug feature is enabled that would be raised by pointers passed to DMA API as arguments to inconsistent struct device objects, so that the patch makes them usage aligned between DMA operations such as dma_map_*() and dma_unmap_*() to eliminate the warning. Signed-off-by: Sean Wang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 9901527..f160954 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -558,15 +558,15 @@ static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring, return &ring->buf[idx]; } -static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf) +static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf) { if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { - dma_unmap_single(dev, + dma_unmap_single(eth->dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) { - dma_unmap_page(dev, + dma_unmap_page(eth->dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); @@ -611,9 +611,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, if (skb_vlan_tag_present(skb)) txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb); - mapped_addr = dma_map_single(&dev->dev, skb->data, + mapped_addr = dma_map_single(eth->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) return -ENOMEM; WRITE_ONCE(itxd->txd1, mapped_addr); @@ -639,10 +639,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, n_desc++; frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN); - mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset, + mapped_addr = skb_frag_dma_map(eth->dev, frag, offset, frag_map_size, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) goto err_dma; if (i == nr_frags - 1 && @@ -695,7 +695,7 @@ err_dma: tx_buf = mtk_desc_to_tx_buf(ring, itxd); /* unmap dma */ - mtk_tx_unmap(&dev->dev, tx_buf); + mtk_tx_unmap(eth, tx_buf); itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2); @@ -852,11 +852,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, netdev->stats.rx_dropped++; goto release_desc; } - dma_addr = dma_map_single(ð->netdev[mac]->dev, + dma_addr = dma_map_single(eth->dev, new_data + NET_SKB_PAD, ring->buf_size, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) { + if (unlikely(dma_mapping_error(eth->dev, dma_addr))) { skb_free_frag(new_data); netdev->stats.rx_dropped++; goto release_desc; @@ -871,7 +871,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, } skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - dma_unmap_single(&netdev->dev, trxd.rxd1, + dma_unmap_single(eth->dev, trxd.rxd1, ring->buf_size, DMA_FROM_DEVICE); pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); skb->dev = netdev; @@ -953,7 +953,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) done[mac]++; budget--; } - mtk_tx_unmap(eth->dev, tx_buf); + mtk_tx_unmap(eth, tx_buf); ring->last_free = desc; atomic_inc(&ring->free_count); @@ -1108,7 +1108,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) if (ring->buf) { for (i = 0; i < MTK_DMA_SIZE; i++) - mtk_tx_unmap(eth->dev, &ring->buf[i]); + mtk_tx_unmap(eth, &ring->buf[i]); kfree(ring->buf); ring->buf = NULL; } -- cgit v0.10.2 From 33be126510974e2eb9679f1ca9bca4f67ee4c4c7 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 16 Aug 2016 10:18:03 +0300 Subject: xhci: always handle "Command Ring Stopped" events Fix "Command completion event does not match command" errors by always handling the command ring stopped events. The command ring stopped event is generated as a result of aborting or stopping the command ring with a register write. It is not caused by a command in the command queue, and thus won't have a matching command in the comman list. Solve it by handling the command ring stopped event before checking for a matching command. In most command time out cases we abort the command ring, and get a command ring stopped event. The events command pointer will point at the current command ring dequeue, which in most cases matches the timed out command in the command list, and no error messages are seen. If we instead get a command aborted event before the command ring stopped event, the abort event will increse the command ring dequeue pointer, and the following command ring stopped events command pointer will point at the next, not yet queued command. This case triggered the error message Signed-off-by: Mathias Nyman CC: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 918e0c7..fe5b70b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1334,12 +1334,6 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list); - if (cmd->command_trb != xhci->cmd_ring->dequeue) { - xhci_err(xhci, - "Command completion event does not match command\n"); - return; - } - del_timer(&xhci->cmd_timer); trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event); @@ -1351,6 +1345,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, xhci_handle_stopped_cmd_ring(xhci, cmd); return; } + + if (cmd->command_trb != xhci->cmd_ring->dequeue) { + xhci_err(xhci, + "Command completion event does not match command\n"); + return; + } + /* * Host aborted the command ring, check if the current command was * supposed to be aborted, otherwise continue normally. -- cgit v0.10.2 From 0d2daaded82565f807a4435d678343f437b8b848 Mon Sep 17 00:00:00 2001 From: Alban Browaeys Date: Tue, 16 Aug 2016 10:18:04 +0300 Subject: xhci: really enqueue zero length TRBs. Enqueue the first TRB even if full_len is zero. Without this "adb install " freezes the system. Signed-off-by: Alban Browaeys Fixes: 86065c2719a5 ("xhci: don't rely on precalculated value of needed trbs in the enqueue loop") Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index fe5b70b..fd9fd12 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3244,7 +3244,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, send_addr = addr; /* Queue the TRBs, even if they are zero-length */ - for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) { + for (enqd_len = 0; first_trb || enqd_len < full_len; + enqd_len += trb_buff_len) { field = TRB_TYPE(TRB_NORMAL); /* TRB buffer should not cross 64KB boundaries */ -- cgit v0.10.2 From 88716a93766b8f095cdef37a8e8f2c93aa233b21 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 16 Aug 2016 10:18:05 +0300 Subject: usb: xhci: Fix panic if disconnect After a device is disconnected, xhci_stop_device() will be invoked in xhci_bus_suspend(). Also the "disconnect" IRQ will have ISR to invoke xhci_free_virt_device() in this sequence. xhci_irq -> xhci_handle_event -> handle_cmd_completion -> xhci_handle_cmd_disable_slot -> xhci_free_virt_device If xhci->devs[slot_id] has been assigned to NULL in xhci_free_virt_device(), then virt_dev->eps[i].ring in xhci_stop_device() may point to an invlid address to cause kernel panic. virt_dev = xhci->devs[slot_id]; : if (virt_dev->eps[i].ring && virt_dev->eps[i].ring->dequeue) [] Unable to handle kernel paging request at virtual address 00001a68 [] pgd=ffffffc001430000 [] [00001a68] *pgd=000000013c807003, *pud=000000013c807003, *pmd=000000013c808003, *pte=0000000000000000 [] Internal error: Oops: 96000006 [#1] PREEMPT SMP [] CPU: 0 PID: 39 Comm: kworker/0:1 Tainted: G U [] Workqueue: pm pm_runtime_work [] task: ffffffc0bc0e0bc0 ti: ffffffc0bc0ec000 task.ti: ffffffc0bc0ec000 [] PC is at xhci_stop_device.constprop.11+0xb4/0x1a4 This issue is found when running with realtek ethernet device (0bda:8153). Signed-off-by: Jim Lin Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index d61fcc4..730b9fd 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -386,6 +386,9 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) ret = 0; virt_dev = xhci->devs[slot_id]; + if (!virt_dev) + return -ENODEV; + cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO); if (!cmd) { xhci_dbg(xhci, "Couldn't allocate command structure.\n"); -- cgit v0.10.2 From f1f6d9a8b540df22b87a5bf6bc104edaade81f47 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 16 Aug 2016 10:18:06 +0300 Subject: xhci: don't dereference a xhci member after removing xhci Remove the hcd after checking for the xhci last quirks, not before. This caused a hang on a Alpine Ridge xhci based maching which remove the whole xhci controller when unplugging the last usb device CC: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 4fd041b..d7b0f97 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -314,11 +314,12 @@ static void xhci_pci_remove(struct pci_dev *dev) usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); } - usb_hcd_pci_remove(dev); /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) pci_set_power_state(dev, PCI_D3hot); + + usb_hcd_pci_remove(dev); } #ifdef CONFIG_PM -- cgit v0.10.2 From 83cf8df2d4fa48a80b384fea4b09b12180a2442e Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 16 Aug 2016 08:27:53 +0200 Subject: drivers/iio/light/Kconfig: SENSORS_BH1780 cleanup Commit 7ef9153d9af5 ("misc: delete bh1780 driver") has removed the Kconfig option SENSORS_BH1780. Remove the last reference on this option. Signed-off-by: Valentin Rothberg Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7c566f5..69904d5 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -76,7 +76,6 @@ config BH1750 config BH1780 tristate "ROHM BH1780 ambient light sensor" depends on I2C - depends on !SENSORS_BH1780 help Say Y here to build support for the ROHM BH1780GLI ambient light sensor. -- cgit v0.10.2 From 39bbee4e549fbc358b2ef9137c4bf459abd164fb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Aug 2016 09:24:39 +0100 Subject: nvme-rdma: initialize ret to zero to avoid returning garbage ret is not initialized so it contains garbage. Ensure garbage is not returned by initializing rc to 0. Signed-off-by: Colin Ian King Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8d2875b..9c69393 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1319,7 +1319,7 @@ out_destroy_queue_ib: static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret; + int ret = 0; /* Own the controller deletion */ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) -- cgit v0.10.2 From 172b1d6b5a9337eb8c1ec294b80e448e03a9ac17 Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Fri, 12 Aug 2016 06:24:42 -0400 Subject: crypto: sha256-mb - fix ctx pointer and digest copy 1. fix ctx pointer Use req_ctx which is the ctx for the next job that have been completed in the lanes instead of the first completed job rctx, whose completion could have been called and released. 2. fix digest copy Use XMM register to copy another 16 bytes sha256 digest instead of a regular register. Signed-off-by: Xiaodong Liu Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index 89fa85e..6f97fb3 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -485,10 +485,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index b691da9..a78a069 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -265,13 +265,14 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - movl _args_digest+4*32(state, idx, 4), tmp2_w + vmovd _args_digest(state , idx, 4) , %xmm0 vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 - vmovdqu %xmm0, _result_digest(job_rax) - movl tmp2_w, _result_digest+1*16(job_rax) + vmovdqu %xmm0, _result_digest(job_rax) + offset = (_result_digest + 1*16) + vmovdqu %xmm1, offset(job_rax) pop %rbx -- cgit v0.10.2 From e67479b13ede47cc2f5beb5b51e67fdb30778ee8 Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Fri, 12 Aug 2016 06:28:31 -0400 Subject: crypto: sha512-mb - fix ctx pointer 1. fix ctx pointer Use req_ctx which is the ctx for the next job that have been completed in the lanes instead of the first completed job rctx, whose completion could have been called and released. Signed-off-by: Xiaodong Liu Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index f4cf5b7..d210174 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -497,10 +497,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } -- cgit v0.10.2 From b0f12c61de013ecb0abe19d5e64bdab45989de5a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 16 Aug 2016 11:24:46 +0100 Subject: ASoC: compress: Fix leak of a widget list in soc_compr_open_fe After we have called dpcm_path_get we should make sure to call dpcm_path_put on all error paths. This was not happening causing the allocated widget list to be leaked, this patch corrects this by adding a dpcm_path_put to the error path. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index d2df46c..bf7b52f 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -121,7 +121,7 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream) dpcm_be_disconnect(fe, stream); fe->dpcm[stream].runtime = NULL; - goto fe_err; + goto path_err; } dpcm_clear_pending_state(fe, stream); @@ -136,6 +136,8 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream) return 0; +path_err: + dpcm_path_put(&list); fe_err: if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown) fe->dai_link->compr_ops->shutdown(cstream); -- cgit v0.10.2 From 924d8696751c4b9e58263bc82efdafcf875596a6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 16 Aug 2016 10:46:38 +0100 Subject: PM / hibernate: Fix rtree_next_node() to avoid walking off list ends rtree_next_node() walks the linked list of leaf nodes to find the next block of pages in the struct memory_bitmap. If it walks off the end of the list of nodes, it walks the list of memory zones to find the next region of memory. If it walks off the end of the list of zones, it returns false. This leaves the struct bm_position's node and zone pointers pointing at their respective struct list_heads in struct mem_zone_bm_rtree. memory_bm_find_bit() uses struct bm_position's node and zone pointers to avoid walking lists and trees if the next bit appears in the same node/zone. It handles these values being stale. Swap rtree_next_node()s 'step then test' to 'test-next then step', this means if we reach the end of memory we return false and leave the node and zone pointers as they were. This fixes a panic on resume using AMD Seattle with 64K pages: [ 6.868732] Freezing user space processes ... (elapsed 0.000 seconds) done. [ 6.875753] Double checking all user space processes after OOM killer disable... (elapsed 0.000 seconds) [ 6.896453] PM: Using 3 thread(s) for decompression. [ 6.896453] PM: Loading and decompressing image data (5339 pages)... [ 7.318890] PM: Image loading progress: 0% [ 7.323395] Unable to handle kernel paging request at virtual address 00800040 [ 7.330611] pgd = ffff000008df0000 [ 7.334003] [00800040] *pgd=00000083fffe0003, *pud=00000083fffe0003, *pmd=00000083fffd0003, *pte=0000000000000000 [ 7.344266] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 7.349825] Modules linked in: [ 7.352871] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G W I 4.8.0-rc1 #4737 [ 7.360512] Hardware name: AMD Overdrive/Supercharger/Default string, BIOS ROD1002C 04/08/2016 [ 7.369109] task: ffff8003c0220000 task.stack: ffff8003c0280000 [ 7.375020] PC is at set_bit+0x18/0x30 [ 7.378758] LR is at memory_bm_set_bit+0x24/0x30 [ 7.383362] pc : [] lr : [] pstate: 60000045 [ 7.390743] sp : ffff8003c0283b00 [ 7.473551] [ 7.475031] Process swapper/0 (pid: 1, stack limit = 0xffff8003c0280020) [ 7.481718] Stack: (0xffff8003c0283b00 to 0xffff8003c0284000) [ 7.800075] Call trace: [ 7.887097] [] set_bit+0x18/0x30 [ 7.891876] [] duplicate_memory_bitmap.constprop.38+0x54/0x70 [ 7.899172] [] snapshot_write_next+0x22c/0x47c [ 7.905166] [] load_image_lzo+0x754/0xa88 [ 7.910725] [] swsusp_read+0x144/0x230 [ 7.916025] [] load_image_and_restore+0x58/0x90 [ 7.922105] [] software_resume+0x2f0/0x338 [ 7.927752] [] do_one_initcall+0x38/0x11c [ 7.933314] [] kernel_init_freeable+0x14c/0x1ec [ 7.939395] [] kernel_init+0x10/0xfc [ 7.944520] [] ret_from_fork+0x10/0x40 [ 7.949820] Code: d2800022 8b400c21 f9800031 9ac32043 (c85f7c22) [ 7.955909] ---[ end trace 0024a5986e6ff323 ]--- [ 7.960529] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Here struct mem_zone_bm_rtree's start_pfn has been returned instead of struct rtree_node's addr as the node/zone pointers are corrupt after we walked off the end of the lists during mark_unsafe_pages(). This behaviour was exposed by commit 6dbecfd345a6 ("PM / hibernate: Simplify mark_unsafe_pages()"), which caused mark_unsafe_pages() to call duplicate_memory_bitmap(), which uses memory_bm_find_bit() after walking off the end of the memory bitmap. Fixes: 3a20cb177961 (PM / Hibernate: Implement position keeping in radix tree) Signed-off-by: James Morse [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d90df92..2ba1869 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) */ static bool rtree_next_node(struct memory_bitmap *bm) { - bm->cur.node = list_entry(bm->cur.node->list.next, - struct rtree_node, list); - if (&bm->cur.node->list != &bm->cur.zone->leaves) { + if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { + bm->cur.node = list_entry(bm->cur.node->list.next, + struct rtree_node, list); bm->cur.node_pfn += BM_BITS_PER_BLOCK; bm->cur.node_bit = 0; touch_softlockup_watchdog(); @@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm) } /* No more nodes, goto next zone */ - bm->cur.zone = list_entry(bm->cur.zone->list.next, + if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { + bm->cur.zone = list_entry(bm->cur.zone->list.next, struct mem_zone_bm_rtree, list); - if (&bm->cur.zone->list != &bm->zones) { bm->cur.node = list_entry(bm->cur.zone->leaves.next, struct rtree_node, list); bm->cur.node_pfn = 0; -- cgit v0.10.2 From 3e103a65514c2947e53f3171b21255fbde8b60c6 Mon Sep 17 00:00:00 2001 From: Christoph Huber Date: Mon, 15 Aug 2016 18:59:25 +0200 Subject: ASoC: atmel_ssc_dai: Don't unconditionally reset SSC on stream startup commit cbaadf0f90d6 ("ASoC: atmel_ssc_dai: refactor the startup and shutdown") refactored code such that the SSC is reset on every startup; this breaks duplex audio (e.g. first start audio playback, then start record, causing the playback to stop/hang) Fixes: cbaadf0f90d6 (ASoC: atmel_ssc_dai: refactor the startup and shutdown) Signed-off-by: Christoph Huber Signed-off-by: Peter Meerwald-Stadler Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 54c09ac..16e459a 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -299,8 +299,9 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream, clk_enable(ssc_p->ssc->clk); ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk); - /* Reset the SSC to keep it at a clean status */ - ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST)); + /* Reset the SSC unless initialized to keep it in a clean state */ + if (!ssc_p->initialized) + ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST)); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { dir = 0; -- cgit v0.10.2 From 3256aaef5e9a851f6be47656868020726e102187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Tue, 16 Aug 2016 15:11:25 +0200 Subject: nvmet-rdma: Fix use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid dereferencing the queue pointer in nvmet_rdma_release_queue_work() after it has been freed by nvmet_rdma_free_queue(). Fixes: d8f7750a08968b10 ("nvmet-rdma: Correctly handle RDMA device hot removal") Signed-off-by: Vincent Stehlé Cc: Sagi Grimberg Cc: Christoph Hellwig Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index b4d6485..5de8d0a 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -978,10 +978,11 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w) container_of(w, struct nvmet_rdma_queue, release_work); struct rdma_cm_id *cm_id = queue->cm_id; struct nvmet_rdma_device *dev = queue->dev; + enum nvmet_rdma_queue_state state = queue->state; nvmet_rdma_free_queue(queue); - if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL) + if (state != NVMET_RDMA_IN_DEVICE_REMOVAL) rdma_destroy_id(cm_id); kref_put(&dev->ref, nvmet_rdma_free_dev); -- cgit v0.10.2 From cab0b8d50e9bbef62c04067072c953433a87a9ff Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 12 Aug 2016 19:25:21 -0400 Subject: drm/amdgpu: Change GART offset to 64-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GART aperture size can be bigger than 4GB. Therefore the offset used in amdgpu_gart_bind and amdgpu_gart_unbind must be 64-bit. Reviewed-by: Christian König Signed-off-by: Felix Kuehling Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8ebc5f1..8c704c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -646,9 +646,9 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_fini(struct amdgpu_device *adev); -void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, +void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); -int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, +int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint32_t flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 921bce2..0feea34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -221,7 +221,7 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) * Unbinds the requested pages from the gart page table and * replaces them with the dummy page (all asics). */ -void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, +void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; @@ -268,7 +268,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, * (all asics). * Returns 0 for success, -EINVAL for failure. */ -int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, +int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint32_t flags) { -- cgit v0.10.2 From a956beda19a6b39fbc19d0aaf21947acdc18cf74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Aug 2016 10:26:47 -0400 Subject: NFS: Allow the mount option retrans=0 We should allow retrans=0 as just meaning that every timeout is a major timeout, and that there is no increment in the timeout value. For instance, this means that we would allow TCP users to specify a flat timeout value of 60s, by specifying "timeo=600,retrans=0" in their mount option string. Siged-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 003ebce..1e10678 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -426,7 +426,7 @@ EXPORT_SYMBOL_GPL(nfs_mark_client_ready); * Initialise the timeout values for a connection */ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, - unsigned int timeo, unsigned int retrans) + int timeo, int retrans) { to->to_initval = timeo * HZ / 10; to->to_retries = retrans; @@ -434,9 +434,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, switch (proto) { case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_RDMA: - if (to->to_retries == 0) + if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; - if (to->to_initval == 0) + if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0) to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; @@ -449,9 +449,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_exponential = 0; break; case XPRT_TRANSPORT_UDP: - if (to->to_retries == 0) + if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_UDP_RETRANS; - if (!to->to_initval) + if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_UDP_TIMEOUT) to->to_initval = NFS_MAX_UDP_TIMEOUT; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7ce5e02..74935a1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -58,6 +58,9 @@ struct nfs_clone_mount { */ #define NFS_UNSPEC_PORT (-1) +#define NFS_UNSPEC_RETRANS (UINT_MAX) +#define NFS_UNSPEC_TIMEO (UINT_MAX) + /* * Maximum number of pages that readdir can use for creating * a vmapped array of pages. @@ -156,7 +159,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); -void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); +void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans); int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, rpc_authflavor_t); struct nfs_server *nfs_alloc_server(void); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 18d446e..d396013 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -923,6 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data = kzalloc(sizeof(*data), GFP_KERNEL); if (data) { + data->timeo = NFS_UNSPEC_TIMEO; + data->retrans = NFS_UNSPEC_RETRANS; data->acregmin = NFS_DEF_ACREGMIN; data->acregmax = NFS_DEF_ACREGMAX; data->acdirmin = NFS_DEF_ACDIRMIN; @@ -1189,6 +1191,19 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option) return rc; } +static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, + unsigned long l_bound, unsigned long u_bound) +{ + int ret; + + ret = nfs_get_option_ul(args, option); + if (ret != 0) + return ret; + if (*option < l_bound || *option > u_bound) + return -ERANGE; + return 0; +} + /* * Error-check and convert a string of mount options from user space into * a data structure. The whole mount string is processed; bad options are @@ -1352,12 +1367,12 @@ static int nfs_parse_mount_options(char *raw, mnt->bsize = option; break; case Opt_timeo: - if (nfs_get_option_ul(args, &option) || option == 0) + if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) goto out_invalid_value; mnt->timeo = option; break; case Opt_retrans: - if (nfs_get_option_ul(args, &option) || option == 0) + if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) goto out_invalid_value; mnt->retrans = option; break; -- cgit v0.10.2 From 15d03055cf39fe61714aeda8d0a722b3137531ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Aug 2016 11:08:22 -0400 Subject: pNFS/flexfiles: Set reasonable default retrans values for the data channel Prior to this patch, the retrans value was set at 5, meaning that we could see a maximum retransmission timeout value of more than 6 minutes. That's a tad high for NFSv3 where the protocol does allow the server to drop requests at any time. Since this is a data channel, let's just set retrans to 0, and the default timeout to 60s. The user can continue to adjust these defaults using the dataserver_retrans and dataserver_timeo module parameters. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 0aa36be..970efba 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -17,8 +17,8 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; -static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_retrans; void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { -- cgit v0.10.2 From 7afafc8a44bf0ab841b17d450b02aedb3a138985 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Aug 2016 10:59:35 +0300 Subject: block: Fix secure erase Commit 288dab8a35a0 ("block: add a separate operation type for secure erase") split REQ_OP_SECURE_ERASE from REQ_OP_DISCARD without considering all the places REQ_OP_DISCARD was being used to mean either. Fix those. Signed-off-by: Adrian Hunter Fixes: 288dab8a35a0 ("block: add a separate operation type for secure erase") Signed-off-by: Jens Axboe diff --git a/block/bio.c b/block/bio.c index f394775..aa73540 100644 --- a/block/bio.c +++ b/block/bio.c @@ -667,18 +667,19 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; - if (bio_op(bio) == REQ_OP_DISCARD) - goto integrity_clone; - - if (bio_op(bio) == REQ_OP_WRITE_SAME) { + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: + break; + case REQ_OP_WRITE_SAME: bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; - goto integrity_clone; + break; + default: + bio_for_each_segment(bv, bio_src, iter) + bio->bi_io_vec[bio->bi_vcnt++] = bv; + break; } - bio_for_each_segment(bv, bio_src, iter) - bio->bi_io_vec[bio->bi_vcnt++] = bv; - -integrity_clone: if (bio_integrity(bio_src)) { int ret; @@ -1788,7 +1789,7 @@ struct bio *bio_split(struct bio *bio, int sectors, * Discards need a mutable bio_vec to accommodate the payload * required by the DSM TRIM and UNMAP commands. */ - if (bio_op(bio) == REQ_OP_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE) split = bio_clone_bioset(bio, gfp, bs); else split = bio_clone_fast(bio, gfp, bs); diff --git a/block/blk-merge.c b/block/blk-merge.c index 3eec75a..72627e3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -172,12 +172,18 @@ void blk_queue_split(struct request_queue *q, struct bio **bio, struct bio *split, *res; unsigned nsegs; - if (bio_op(*bio) == REQ_OP_DISCARD) + switch (bio_op(*bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: split = blk_bio_discard_split(q, *bio, bs, &nsegs); - else if (bio_op(*bio) == REQ_OP_WRITE_SAME) + break; + case REQ_OP_WRITE_SAME: split = blk_bio_write_same_split(q, *bio, bs, &nsegs); - else + break; + default: split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); + break; + } /* physical segments can be figured out during splitting */ res = split ? split : *bio; @@ -213,7 +219,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, * This should probably be returning 0, but blk_add_request_payload() * (Christoph!!!!) */ - if (bio_op(bio) == REQ_OP_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE) return 1; if (bio_op(bio) == REQ_OP_WRITE_SAME) @@ -385,7 +391,9 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, nsegs = 0; cluster = blk_queue_cluster(q); - if (bio_op(bio) == REQ_OP_DISCARD) { + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: /* * This is a hack - drivers should be neither modifying the * biovec, nor relying on bi_vcnt - but because of @@ -393,19 +401,16 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, * a payload we need to set up here (thank you Christoph) and * bi_vcnt is really the only way of telling if we need to. */ - - if (bio->bi_vcnt) - goto single_segment; - - return 0; - } - - if (bio_op(bio) == REQ_OP_WRITE_SAME) { -single_segment: + if (!bio->bi_vcnt) + return 0; + /* Fall through */ + case REQ_OP_WRITE_SAME: *sg = sglist; bvec = bio_iovec(bio); sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); return 1; + default: + break; } for_each_bio(bio) diff --git a/block/elevator.c b/block/elevator.c index 7096c22..f7d973a 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -366,7 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); - if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD)) + if (req_op(rq) != req_op(pos)) break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 48a5dd7..82503e6 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1726,6 +1726,7 @@ static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req) break; if (req_op(next) == REQ_OP_DISCARD || + req_op(next) == REQ_OP_SECURE_ERASE || req_op(next) == REQ_OP_FLUSH) break; diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index bf14642..29578e9 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -33,7 +33,8 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) /* * We only like normal block requests and discards. */ - if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD) { + if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD && + req_op(req) != REQ_OP_SECURE_ERASE) { blk_dump_rq_flags(req, "MMC bad request"); return BLKPREP_KILL; } diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index d625311..fee5e12 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -4,7 +4,9 @@ static inline bool mmc_req_is_special(struct request *req) { return req && - (req_op(req) == REQ_OP_FLUSH || req_op(req) == REQ_OP_DISCARD); + (req_op(req) == REQ_OP_FLUSH || + req_op(req) == REQ_OP_DISCARD || + req_op(req) == REQ_OP_SECURE_ERASE); } struct request; diff --git a/include/linux/bio.h b/include/linux/bio.h index 59ffaa6..23ddf4b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -71,7 +71,8 @@ static inline bool bio_has_data(struct bio *bio) { if (bio && bio->bi_iter.bi_size && - bio_op(bio) != REQ_OP_DISCARD) + bio_op(bio) != REQ_OP_DISCARD && + bio_op(bio) != REQ_OP_SECURE_ERASE) return true; return false; @@ -79,7 +80,9 @@ static inline bool bio_has_data(struct bio *bio) static inline bool bio_no_advance_iter(struct bio *bio) { - return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME; + return bio_op(bio) == REQ_OP_DISCARD || + bio_op(bio) == REQ_OP_SECURE_ERASE || + bio_op(bio) == REQ_OP_WRITE_SAME; } static inline bool bio_is_rw(struct bio *bio) @@ -199,6 +202,9 @@ static inline unsigned bio_segments(struct bio *bio) if (bio_op(bio) == REQ_OP_DISCARD) return 1; + if (bio_op(bio) == REQ_OP_SECURE_ERASE) + return 1; + if (bio_op(bio) == REQ_OP_WRITE_SAME) return 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c210b6..e79055c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -882,7 +882,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { - if (unlikely(op == REQ_OP_DISCARD)) + if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) return min(q->limits.max_discard_sectors, UINT_MAX >> 9); if (unlikely(op == REQ_OP_WRITE_SAME)) @@ -913,7 +913,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, if (unlikely(rq->cmd_type != REQ_TYPE_FS)) return q->limits.max_hw_sectors; - if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD)) + if (!q->limits.chunk_sectors || + req_op(rq) == REQ_OP_DISCARD || + req_op(rq) == REQ_OP_SECURE_ERASE) return blk_queue_get_max_sectors(q, req_op(rq)); return min(blk_max_size_offset(q, offset), diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7598e6c..dbafc5d 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -223,7 +223,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(op_flags, META); what |= MASK_TC_BIT(op_flags, PREFLUSH); what |= MASK_TC_BIT(op_flags, FUA); - if (op == REQ_OP_DISCARD) + if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) what |= BLK_TC_ACT(BLK_TC_DISCARD); if (op == REQ_OP_FLUSH) what |= BLK_TC_ACT(BLK_TC_FLUSH); -- cgit v0.10.2 From 286054a7a8674b256da16564d96772b88896eb35 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 16 Aug 2016 17:51:06 +0100 Subject: KVM: arm64: ITS: avoid re-mapping LPIs When a guest wants to map a device-ID/event-ID combination that is already mapped, we may end up in a situation where an LPI is never "put", thus never being freed. Since the GICv3 spec says that mapping an already mapped LPI is UNPREDICTABLE, lets just bail out early in this situation to avoid any potential leaks. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 9533080..4660a7d 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -731,7 +731,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); u32 coll_id = its_cmd_get_collection(its_cmd); - struct its_itte *itte, *new_itte = NULL; + struct its_itte *itte; struct its_device *device; struct its_collection *collection, *new_coll = NULL; int lpi_nr; @@ -749,6 +749,10 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) return E_ITS_MAPTI_PHYSICALID_OOR; + /* If there is an existing mapping, behavior is UNPREDICTABLE. */ + if (find_itte(its, device_id, event_id)) + return 0; + collection = find_collection(its, coll_id); if (!collection) { int ret = vgic_its_alloc_collection(its, &collection, coll_id); @@ -757,20 +761,16 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, new_coll = collection; } - itte = find_itte(its, device_id, event_id); + itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); if (!itte) { - itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); - if (!itte) { - if (new_coll) - vgic_its_free_collection(its, coll_id); - return -ENOMEM; - } - - new_itte = itte; - itte->event_id = event_id; - list_add_tail(&itte->itte_list, &device->itt_head); + if (new_coll) + vgic_its_free_collection(its, coll_id); + return -ENOMEM; } + itte->event_id = event_id; + list_add_tail(&itte->itte_list, &device->itt_head); + itte->collection = collection; itte->lpi = lpi_nr; @@ -778,8 +778,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, if (IS_ERR(irq)) { if (new_coll) vgic_its_free_collection(its, coll_id); - if (new_itte) - its_free_itte(kvm, new_itte); + its_free_itte(kvm, itte); return PTR_ERR(irq); } itte->irq = irq; -- cgit v0.10.2 From 0066c8b6f4050d7c57f6379d6fd4535e2f267f17 Mon Sep 17 00:00:00 2001 From: Kshitiz Gupta Date: Sat, 16 Jul 2016 02:23:45 -0500 Subject: igb: fix adjusting PTP timestamps for Tx/Rx latency Fix PHY delay compensation math in igb_ptp_tx_hwtstamp() and igb_ptp_rx_rgtstamp. Add PHY delay compensation in igb_ptp_rx_pktstamp(). In the IGB driver, there are two functions that retrieve timestamps received by the PHY - igb_ptp_rx_rgtstamp() and igb_ptp_rx_pktstamp(). The previous commit only changed igb_ptp_rx_rgtstamp(), and the change was incorrect. There are two instances in which PHY delay compensations should be made: - Before the packet transmission over the PHY, the latency between when the packet is timestamped and transmission of the packets, should be an add operation, but it is currently a subtract. - After the packets are received from the PHY, the latency between the receiving and timestamping of the packets should be a subtract operation, but it is currently an add. Signed-off-by: Kshitiz Gupta Fixes: 3f544d2 (igb: adjust ptp timestamps for tx/rx latency) Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index e61b647..336c103 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -744,7 +744,8 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) } } - shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust); + shhwtstamps.hwtstamp = + ktime_add_ns(shhwtstamps.hwtstamp, adjust); skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); dev_kfree_skb_any(adapter->ptp_tx_skb); @@ -767,13 +768,32 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { __le64 *regval = (__le64 *)va; + struct igb_adapter *adapter = q_vector->adapter; + int adjust = 0; /* The timestamp is recorded in little endian format. * DWORD: 0 1 2 3 * Field: Reserved Reserved SYSTIML SYSTIMH */ - igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), le64_to_cpu(regval[1])); + + /* adjust timestamp for the RX latency based on link speed */ + if (adapter->hw.mac.type == e1000_i210) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGB_I210_RX_LATENCY_10; + break; + case SPEED_100: + adjust = IGB_I210_RX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGB_I210_RX_LATENCY_1000; + break; + } + } + skb_hwtstamps(skb)->hwtstamp = + ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); } /** @@ -825,7 +845,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, } } skb_hwtstamps(skb)->hwtstamp = - ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); /* Update the last_rx_timestamp timer in order to enable watchdog check * for error case of latched timestamp on a dropped packet. -- cgit v0.10.2 From 0be5b96cd8400aeb4bf3f8c5e7f5efaa38ae5055 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 26 Jul 2016 14:25:34 -0400 Subject: e1000e: factor out systim sanitization This is prepatory work for an expanding list of adapter families that have occasional ~10 hour clock jumps when being used for PTP. Factor out the sanitization function and convert to using a feature (bug) flag, per suggestion from Jesse Brandeburg. Littering functional code with device-specific checks is much messier than simply checking a flag, and having device-specific init set flags as needed. There are probably a number of other cases in the e1000e code that could/should be converted similarly. Suggested-by: Jesse Brandeburg Signed-off-by: Jarod Wilson Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 7fd4d54..6b03c85 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2032,7 +2032,8 @@ const struct e1000_info e1000_82574_info = { | FLAG2_DISABLE_ASPM_L0S | FLAG2_DISABLE_ASPM_L1 | FLAG2_NO_DISABLE_RX - | FLAG2_DMA_BURST, + | FLAG2_DMA_BURST + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 32, .max_hw_frame_size = DEFAULT_JUMBO, .get_variants = e1000_get_variants_82571, @@ -2053,7 +2054,8 @@ const struct e1000_info e1000_82583_info = { | FLAG_HAS_CTRLEXT_ON_LOAD, .flags2 = FLAG2_DISABLE_ASPM_L0S | FLAG2_DISABLE_ASPM_L1 - | FLAG2_NO_DISABLE_RX, + | FLAG2_NO_DISABLE_RX + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 32, .max_hw_frame_size = DEFAULT_JUMBO, .get_variants = e1000_get_variants_82571, diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index ef96cd1..879cca4 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -452,6 +452,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); #define FLAG2_PCIM2PCI_ARBITER_WA BIT(11) #define FLAG2_DFLT_CRC_STRIPPING BIT(12) #define FLAG2_CHECK_RX_HWTSTAMP BIT(13) +#define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 02f4439..7017281 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4303,6 +4303,42 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter) } /** + * e1000e_sanitize_systim - sanitize raw cycle counter reads + * @hw: pointer to the HW structure + * @systim: cycle_t value read, sanitized and returned + * + * Errata for 82574/82583 possible bad bits read from SYSTIMH/L: + * check to see that the time is incrementing at a reasonable + * rate and is a multiple of incvalue. + **/ +static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim) +{ + u64 time_delta, rem, temp; + cycle_t systim_next; + u32 incvalue; + int i; + + incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK; + for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) { + /* latch SYSTIMH on read of SYSTIML */ + systim_next = (cycle_t)er32(SYSTIML); + systim_next |= (cycle_t)er32(SYSTIMH) << 32; + + time_delta = systim_next - systim; + temp = time_delta; + /* VMWare users have seen incvalue of zero, don't div / 0 */ + rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0); + + systim = systim_next; + + if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0)) + break; + } + + return systim; +} + +/** * e1000e_cyclecounter_read - read raw cycle counter (used by time counter) * @cc: cyclecounter structure **/ @@ -4312,7 +4348,7 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc) cc); struct e1000_hw *hw = &adapter->hw; u32 systimel, systimeh; - cycle_t systim, systim_next; + cycle_t systim; /* SYSTIMH latching upon SYSTIML read does not work well. * This means that if SYSTIML overflows after we read it but before * we read SYSTIMH, the value of SYSTIMH has been incremented and we @@ -4335,33 +4371,9 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc) systim = (cycle_t)systimel; systim |= (cycle_t)systimeh << 32; - if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) { - u64 time_delta, rem, temp; - u32 incvalue; - int i; - - /* errata for 82574/82583 possible bad bits read from SYSTIMH/L - * check to see that the time is incrementing at a reasonable - * rate and is a multiple of incvalue - */ - incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK; - for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) { - /* latch SYSTIMH on read of SYSTIML */ - systim_next = (cycle_t)er32(SYSTIML); - systim_next |= (cycle_t)er32(SYSTIMH) << 32; - - time_delta = systim_next - systim; - temp = time_delta; - /* VMWare users have seen incvalue of zero, don't div / 0 */ - rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0); - - systim = systim_next; + if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW) + systim = e1000e_sanitize_systim(hw, systim); - if ((time_delta < E1000_82574_SYSTIM_EPSILON) && - (rem == 0)) - break; - } - } return systim; } -- cgit v0.10.2 From 8037dd60f45264c3fbbea4cc0cea5f2f0a774b5e Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 26 Jul 2016 14:25:35 -0400 Subject: e1000e: fix PTP on e1000_pch_lpt variants I've got reports that the Intel I-218V NIC in Intel NUC5i5RYH systems used as a PTP slave experiences random ~10 hour clock jumps, which are resolved if the same workaround for the 82574 and 82583 is employed, so set the appropriate flag2 in e1000_pch_lpt_info too. Reported-by: Rupesh Patel Signed-off-by: Jarod Wilson Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 3e11322..f3aaca7 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -5885,7 +5885,8 @@ const struct e1000_info e1000_pch_lpt_info = { | FLAG_HAS_JUMBO_FRAMES | FLAG_APME_IN_WUC, .flags2 = FLAG2_HAS_PHY_STATS - | FLAG2_HAS_EEE, + | FLAG2_HAS_EEE + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 26, .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, -- cgit v0.10.2 From 4fe0d154880bb6eb833cbe84fa6f385f400f0b9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Aug 2016 07:11:04 -0700 Subject: PCI: Use positive flags in pci_alloc_irq_vectors() Instead of passing negative flags like PCI_IRQ_NOMSI to prevent use of certain interrupt types, pass positive flags like PCI_IRQ_LEGACY, PCI_IRQ_MSI, etc., to specify the acceptable interrupt types. This is based on a number of pending driver conversions that just happend to be a whole more obvious to read this way, and given that we have no users in the tree yet it can still easily be done. I've also added a PCI_IRQ_ALL_TYPES catchall to keep the case of accepting all interrupt types very simple. [bhelgaas: changelog, fix PCI_IRQ_AFFINITY doc typo, remove mention of PCI_IRQ_NOLEGACY] Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Alexander Gordeev diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index c55df29..cd9c9f6 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -94,14 +94,11 @@ has a requirements for a minimum number of vectors the driver can pass a min_vecs argument set to this limit, and the PCI core will return -ENOSPC if it can't meet the minimum number of vectors. -The flags argument should normally be set to 0, but can be used to pass the -PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support -MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in -case the device does not support legacy interrupt lines. - -By default this function will spread the interrupts around the available -CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY -flag. +The flags argument is used to specify which type of interrupt can be used +by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX). +A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for +any possible kind of interrupt. If the PCI_IRQ_AFFINITY flag is set, +pci_alloc_irq_vectors() will spread the interrupts around the available CPUs. To get the Linux IRQ numbers passed to request_irq() and free_irq() and the vectors, use the following function: @@ -131,7 +128,7 @@ larger than the number supported by the device it will automatically be capped to the supported limit, so there is no need to query the number of vectors supported beforehand: - nvec = pci_alloc_irq_vectors(pdev, 1, nvec, 0); + nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES) if (nvec < 0) goto out_err; @@ -140,7 +137,7 @@ interrupts it can request a particular number of interrupts by passing that number to pci_alloc_irq_vectors() function as both 'min_vecs' and 'max_vecs' parameters: - ret = pci_alloc_irq_vectors(pdev, nvec, nvec, 0); + ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES); if (ret < 0) goto out_err; @@ -148,15 +145,14 @@ The most notorious example of the request type described above is enabling the single MSI mode for a device. It could be done by passing two 1s as 'min_vecs' and 'max_vecs': - ret = pci_alloc_irq_vectors(pdev, 1, 1, 0); + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) goto out_err; Some devices might not support using legacy line interrupts, in which case -the PCI_IRQ_NOLEGACY flag can be used to fail the request if the platform -can't provide MSI or MSI-X interrupts: +the driver can specify that only MSI or MSI-X is acceptable: - nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_NOLEGACY); + nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (nvec < 0) goto out_err; diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a02981e..9233e7f 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1069,7 +1069,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, nvec = maxvec; for (;;) { - if (!(flags & PCI_IRQ_NOAFFINITY)) { + if (flags & PCI_IRQ_AFFINITY) { dev->irq_affinity = irq_create_affinity_mask(&nvec); if (nvec < minvec) return -ENOSPC; @@ -1105,7 +1105,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, **/ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) { - return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY); + return __pci_enable_msi_range(dev, minvec, maxvec, 0); } EXPORT_SYMBOL(pci_enable_msi_range); @@ -1120,7 +1120,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, return -ERANGE; for (;;) { - if (!(flags & PCI_IRQ_NOAFFINITY)) { + if (flags & PCI_IRQ_AFFINITY) { dev->irq_affinity = irq_create_affinity_mask(&nvec); if (nvec < minvec) return -ENOSPC; @@ -1160,8 +1160,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) { - return __pci_enable_msix_range(dev, entries, minvec, maxvec, - PCI_IRQ_NOAFFINITY); + return __pci_enable_msix_range(dev, entries, minvec, maxvec, 0); } EXPORT_SYMBOL(pci_enable_msix_range); @@ -1187,21 +1186,21 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, { int vecs = -ENOSPC; - if (!(flags & PCI_IRQ_NOMSIX)) { + if (flags & PCI_IRQ_MSIX) { vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, flags); if (vecs > 0) return vecs; } - if (!(flags & PCI_IRQ_NOMSI)) { + if (flags & PCI_IRQ_MSI) { vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags); if (vecs > 0) return vecs; } /* use legacy irq if allowed */ - if (!(flags & PCI_IRQ_NOLEGACY) && min_vecs == 1) + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) return 1; return vecs; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 2599a98..fbc1fa6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1251,10 +1251,12 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno); int pci_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags); -#define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */ -#define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */ -#define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */ -#define PCI_IRQ_NOAFFINITY (1 << 3) /* don't auto-assign affinity */ +#define PCI_IRQ_LEGACY (1 << 0) /* allow legacy interrupts */ +#define PCI_IRQ_MSI (1 << 1) /* allow MSI interrupts */ +#define PCI_IRQ_MSIX (1 << 2) /* allow MSI-X interrupts */ +#define PCI_IRQ_AFFINITY (1 << 3) /* auto-assign affinity */ +#define PCI_IRQ_ALL_TYPES \ + (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) /* kmem_cache style wrapper around pci_alloc_consistent() */ -- cgit v0.10.2 From a4423287ec16e74c25de8ee3f261b1ea18c0af67 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 9 Aug 2016 14:56:14 +0200 Subject: dm raid: fix frozen recovery regression On LVM2 conversions via lvconvert(8), the target keeps mapped devices in frozen state when requesting RAID devices be resynchronized. This applies to e.g. adding legs to a raid1 device or taking over from raid0 to raid4 when the rebuild flag's set on the new raid1 legs or the added dedicated parity stripe. Also, fix frozen recovery for reshaping as well. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1b9795d..07d1728 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -191,7 +191,6 @@ struct raid_dev { #define RT_FLAG_RS_BITMAP_LOADED 2 #define RT_FLAG_UPDATE_SBS 3 #define RT_FLAG_RESHAPE_RS 4 -#define RT_FLAG_KEEP_RS_FROZEN 5 /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -2579,7 +2578,6 @@ static int rs_prepare_reshape(struct raid_set *rs) } else { /* Process raid1 without delta_disks */ mddev->raid_disks = rs->raid_disks; - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); reshape = false; } } else { @@ -2590,7 +2588,6 @@ static int rs_prepare_reshape(struct raid_set *rs) if (reshape) { set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags); set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); } else if (mddev->raid_disks < rs->raid_disks) /* Create new superblocks and bitmaps, if any new disks */ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); @@ -2902,7 +2899,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); /* Takeover ain't recovery, so disable recovery */ rs_setup_recovery(rs, MaxSector); rs_set_new(rs); @@ -3624,8 +3620,7 @@ static void raid_resume(struct dm_target *ti) * retrieved from the superblock by the ctr because * the ongoing recovery/reshape will change it after read. */ - if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags)) - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->suspended) mddev_resume(mddev); -- cgit v0.10.2 From f60439bc21e3337429838e477903214f5bd8277f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 11 Aug 2016 14:51:56 -0700 Subject: ixgbe: Force VLNCTRL.VFE to be set in all VMDq paths When I was adding the code for enabling VLAN promiscuous mode with SR-IOV enabled I had inadvertently left the VLNCTRL.VFE bit unchanged as I has assumed there was code in another path that was setting it when we enabled SR-IOV. This wasn't the case and as a result we were just disabling VLAN filtering for all the VFs apparently. Also the previous patches were always clearing CFIEN which was always set to 0 by the hardware anyway so I am dropping the redundant bit clearing. Fixes: 16369564915a ("ixgbe: Add support for VLAN promiscuous with SR-IOV") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5418c69a..e0fdef8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4100,6 +4100,8 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; u32 vlnctrl, i; + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: @@ -4112,8 +4114,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* fall through */ case ixgbe_mac_82598EB: /* legacy case, we can just disable VLAN filtering */ - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); + vlnctrl &= ~IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } @@ -4125,6 +4126,10 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* Set flag so we don't redo unnecessary work */ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; + /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + /* Add PF to all active pools */ for (i = IXGBE_VLVF_ENTRIES; --i;) { u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); @@ -4191,6 +4196,11 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; u32 vlnctrl, i; + /* Set VLAN filtering to enabled */ + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: @@ -4202,10 +4212,6 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) break; /* fall through */ case ixgbe_mac_82598EB: - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; - vlnctrl |= IXGBE_VLNCTRL_VFE; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } -- cgit v0.10.2 From 31e10a41203dbc95e0c1e81ef49ad1773a50d4f9 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 10 Aug 2016 02:45:59 +0200 Subject: dm raid: fix restoring of failed devices regression 'lvchange --refresh RaidLV' causes a mapped device suspend/resume cycle aiming at device restore and resync after transient device failures. This failed because flag RT_FLAG_RS_RESUMED was always cleared in the suspend path, thus the device restore wasn't performed in the resume path. Solve by removing RT_FLAG_RS_RESUMED from the suspend path and resume unconditionally. Also, remove superfluous comment from raid_resume(). Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 07d1728..81ec772 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3382,11 +3382,10 @@ static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; - if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) { - if (!rs->md.suspended) - mddev_suspend(&rs->md); - rs->md.ro = 1; - } + if (!rs->md.suspended) + mddev_suspend(&rs->md); + + rs->md.ro = 1; } static void attempt_restore_of_faulty_devices(struct raid_set *rs) @@ -3606,25 +3605,15 @@ static void raid_resume(struct dm_target *ti) * devices are reachable again. */ attempt_restore_of_faulty_devices(rs); - } else { - mddev->ro = 0; - mddev->in_sync = 0; + } - /* - * When passing in flags to the ctr, we expect userspace - * to reset them because they made it to the superblocks - * and reload the mapping anyway. - * - * -> only unfreeze recovery in case of a table reload or - * we'll have a bogus recovery/reshape position - * retrieved from the superblock by the ctr because - * the ongoing recovery/reshape will change it after read. - */ - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + mddev->ro = 0; + mddev->in_sync = 0; - if (mddev->suspended) - mddev_resume(mddev); - } + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (mddev->suspended) + mddev_resume(mddev); } static struct target_type raid_target = { -- cgit v0.10.2 From 3d951822be216d8c6fcfc8abf75e5ed307eeb646 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Aug 2016 09:53:39 -0700 Subject: ixgbe: Re-enable ability to toggle VLAN filtering Back when I submitted the GSO code I messed up and dropped the support for disabling the VLAN tag filtering via the feature bit. This patch re-enables the use of the NETIF_F_HW_VLAN_CTAG_FILTER to enable/disable the VLAN filtering independent of toggling promiscuous mode. Fixes: b83e30104b ("ixgbe/ixgbevf: Add support for GSO partial") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e0fdef8..ee57a89 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9523,6 +9523,7 @@ skip_sriov: /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_RXALL | -- cgit v0.10.2 From a3c06a389751192fdcbcdd8bba57bdb856eafe68 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 9 Aug 2016 14:55:35 +0200 Subject: dm raid: enhance attempt_restore_of_faulty_devices() to support more devices attempt_restore_of_faulty_devices() is limited to 64 when it should support the new maximum of 253 when identifying any failed devices. It clears any revivable devices via an MD personality hot remove and add cylce to allow for their recovery. Address by using existing functions to retrieve and update all failed devices' bitfield members in the dm raid superblocks on all RAID devices and check for any devices to clear in it. Whilst on it, don't call attempt_restore_of_faulty_devices() for any MD personality not providing disk hot add/remove methods (i.e. raid0 now), because such personalities don't support reviving of failed disks. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 81ec772..b1c251872 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3391,11 +3391,19 @@ static void raid_postsuspend(struct dm_target *ti) static void attempt_restore_of_faulty_devices(struct raid_set *rs) { int i; - uint64_t failed_devices, cleared_failed_devices = 0; + uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS]; unsigned long flags; + bool cleared = false; struct dm_raid_superblock *sb; + struct mddev *mddev = &rs->md; struct md_rdev *r; + /* RAID personalities have to provide hot add/remove methods or we need to bail out. */ + if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk) + return; + + memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); + for (i = 0; i < rs->md.raid_disks; i++) { r = &rs->dev[i].rdev; if (test_bit(Faulty, &r->flags) && r->sb_page && @@ -3415,7 +3423,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) * ourselves. */ if ((r->raid_disk >= 0) && - (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0)) + (mddev->pers->hot_remove_disk(mddev, r) != 0)) /* Failed to revive this device, try next */ continue; @@ -3425,22 +3433,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) clear_bit(Faulty, &r->flags); clear_bit(WriteErrorSeen, &r->flags); clear_bit(In_sync, &r->flags); - if (r->mddev->pers->hot_add_disk(r->mddev, r)) { + if (mddev->pers->hot_add_disk(mddev, r)) { r->raid_disk = -1; r->saved_raid_disk = -1; r->flags = flags; } else { r->recovery_offset = 0; - cleared_failed_devices |= 1 << i; + set_bit(i, (void *) cleared_failed_devices); + cleared = true; } } } - if (cleared_failed_devices) { + + /* If any failed devices could be cleared, update all sbs failed_devices bits */ + if (cleared) { + uint64_t failed_devices[DISKS_ARRAY_ELEMS]; + rdev_for_each(r, &rs->md) { sb = page_address(r->sb_page); - failed_devices = le64_to_cpu(sb->failed_devices); - failed_devices &= ~cleared_failed_devices; - sb->failed_devices = cpu_to_le64(failed_devices); + sb_retrieve_failed_devices(sb, failed_devices); + + for (i = 0; i < DISKS_ARRAY_ELEMS; i++) + failed_devices[i] &= ~cleared_failed_devices[i]; + + sb_update_failed_devices(sb, failed_devices); } } } -- cgit v0.10.2 From fbfe12c64f9650aa22f434dd9dd22df7ddf63221 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Fri, 12 Aug 2016 09:56:32 -0700 Subject: i40e: check for and deal with non-contiguous TCs The i40e driver was causing a kernel panic when non-contiguous Traffic Classes, or Traffic Classes not starting with TC0, were configured on a link partner switch. i40e does not support non-contiguous TCs. To fix this, the patch changes the logic when determining the total number of TCs enabled. Before, this would use the highest TC number enabled and assume that all TCs below it were also enabled. Now, we create a bitmask of enabled TCs and scan it to determine not only the number of TCs, but also if the set of enabled TCs starts at zero and is contiguous. If not, then DCB is disabled by only returning one TC. Signed-off-by: Dave Ertman Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 81c99e1..c6ac7a6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4554,23 +4554,38 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf) **/ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg) { + int i, tc_unused = 0; u8 num_tc = 0; - int i; + u8 ret = 0; /* Scan the ETS Config Priority Table to find * traffic class enabled for a given priority - * and use the traffic class index to get the - * number of traffic classes enabled + * and create a bitmask of enabled TCs */ - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { - if (dcbcfg->etscfg.prioritytable[i] > num_tc) - num_tc = dcbcfg->etscfg.prioritytable[i]; - } + for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) + num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]); - /* Traffic class index starts from zero so - * increment to return the actual count + /* Now scan the bitmask to check for + * contiguous TCs starting with TC0 */ - return num_tc + 1; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (num_tc & BIT(i)) { + if (!tc_unused) { + ret++; + } else { + pr_err("Non-contiguous TC - Disabling DCB\n"); + return 1; + } + } else { + tc_unused = 1; + } + } + + /* There is always at least TC0 */ + if (!ret) + ret = 1; + + return ret; } /** -- cgit v0.10.2 From 4dd3fd7197303739094183b139bae3142a3d55e6 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 17 Aug 2016 08:30:28 +1000 Subject: xfs: don't assert fail on non-async buffers on ioacct decrement The buffer I/O accounting mechanism tracks async buffers under I/O. As an optimization, the buffer I/O count is incremented only once on the first async I/O for a given hold cycle of a buffer and decremented once the buffer is released to the LRU (or freed). xfs_buf_ioacct_dec() has an ASSERT() check for an XBF_ASYNC buffer, but we have one or two corner cases where a buffer can be submitted for I/O multiple times via different methods in a single hold cycle. If an async I/O occurs first, the I/O count is incremented. If a sync I/O occurs before the hold count drops, XBF_ASYNC is cleared by the time the I/O count is decremented. Remove the async assert check from xfs_buf_ioacct_dec() as this is a perfectly valid scenario. For the purposes of I/O accounting, we really only care about the buffer async state at I/O submission time. Discovered-and-analyzed-by: Dave Chinner Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 47a318c..607cc29 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -115,7 +115,6 @@ xfs_buf_ioacct_dec( if (!(bp->b_flags & _XBF_IN_FLIGHT)) return; - ASSERT(bp->b_flags & XBF_ASYNC); bp->b_flags &= ~_XBF_IN_FLIGHT; percpu_counter_dec(&bp->b_target->bt_io_count); } -- cgit v0.10.2 From 0af32fb468b4a4434dd759d68611763658650b59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Aug 2016 08:30:28 +1000 Subject: xfs: fix bogus space reservation in xfs_iomap_write_allocate The space reservations was without an explaination in commit "Add error reporting calls in error paths that return EFSCORRUPTED" back in 2003. There is no reason to reserve disk blocks in the transaction when allocating blocks for delalloc space as we already reserved the space when creating the delalloc extent. With this fix we stop running out of the reserved pool in generic/229, which has happened for long time with small blocksize file systems, and has increased in severity with the new buffered write path. [ dchinner: we still need to pass the block reservation into xfs_bmapi_write() to ensure we don't deadlock during AG selection. See commit dbd5c8c ("xfs: pass total block res. as total xfs_bmapi_write() parameter") for more details on why this is necessary. ] Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2114d53..4398932 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -715,12 +715,16 @@ xfs_iomap_write_allocate( * is in the delayed allocation extent on which we sit * but before our buffer starts. */ - nimaps = 0; while (nimaps == 0) { nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres, + /* + * We have already reserved space for the extent and any + * indirect blocks when creating the delalloc extent, + * there is no need to reserve space in this transaction + * again. + */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; -- cgit v0.10.2 From 8b2180b3bf0338625cab07da6543acb436df9c40 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 17 Aug 2016 08:31:33 +1000 Subject: xfs: don't invalidate whole file on DAX read/write When we do DAX IO, we try to invalidate the entire page cache held on the file. This is incorrect as it will trash the entire mapping tree that now tracks dirty state in exceptional entries in the radix tree slots. What we are trying to do is remove cached pages (e.g from reads into holes) that sit in the radix tree over the range we are about to write to. Hence we should just limit the invalidation to the range we are about to overwrite. Reported-by: Jan Kara Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ed95e5b..e612a02 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -741,9 +741,20 @@ xfs_file_dax_write( * page is inserted into the pagecache when we have to serve a write * fault on a hole. It should never be dirtied and can simply be * dropped from the pagecache once we get real data for the page. + * + * XXX: This is racy against mmap, and there's nothing we can do about + * it. dax_do_io() should really do this invalidation internally as + * it will know if we've allocated over a holei for this specific IO and + * if so it needs to update the mapping tree and invalidate existing + * PTEs over the newly allocated range. Remove this invalidation when + * dax_do_io() is fixed up. */ if (mapping->nrpages) { - ret = invalidate_inode_pages2(mapping); + loff_t end = iocb->ki_pos + iov_iter_count(from) - 1; + + ret = invalidate_inode_pages2_range(mapping, + iocb->ki_pos >> PAGE_SHIFT, + end >> PAGE_SHIFT); WARN_ON_ONCE(ret); } -- cgit v0.10.2 From f32866fdc9ed077c3bc7160f4f53d6a479201d46 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 17 Aug 2016 08:31:49 +1000 Subject: xfs: store rmapbt block count in the AGF Track the number of blocks used for the rmapbt in the AGF. When we get to the AG reservation code we need this counter to quickly make our reservation during mount. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 776ae2f..af1a72e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2264,6 +2264,7 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_longest), offsetof(xfs_agf_t, agf_btreeblks), offsetof(xfs_agf_t, agf_uuid), + offsetof(xfs_agf_t, agf_rmap_blocks), sizeof(xfs_agf_t) }; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f814d42..e6a8bea 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -640,12 +640,15 @@ typedef struct xfs_agf { __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ uuid_t agf_uuid; /* uuid of filesystem */ + __be32 agf_rmap_blocks; /* rmapbt blocks used */ + __be32 agf_padding; /* padding */ + /* * reserve some contiguous space for future logged fields before we add * the unlogged fields. This makes the range logging via flags and * structure offsets much simpler. */ - __be64 agf_spare64[16]; + __be64 agf_spare64[15]; /* unlogged fields, written during buffer writeback. */ __be64 agf_lsn; /* last write sequence */ @@ -670,7 +673,8 @@ typedef struct xfs_agf { #define XFS_AGF_LONGEST 0x00000400 #define XFS_AGF_BTREEBLKS 0x00000800 #define XFS_AGF_UUID 0x00001000 -#define XFS_AGF_NUM_BITS 13 +#define XFS_AGF_RMAP_BLOCKS 0x00002000 +#define XFS_AGF_NUM_BITS 14 #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) #define XFS_AGF_FLAGS \ @@ -686,7 +690,8 @@ typedef struct xfs_agf { { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ { XFS_AGF_LONGEST, "LONGEST" }, \ { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ - { XFS_AGF_UUID, "UUID" } + { XFS_AGF_UUID, "UUID" }, \ + { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" } /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index bc1faeb..17b8eeb 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -98,6 +98,8 @@ xfs_rmapbt_alloc_block( union xfs_btree_ptr *new, int *stat) { + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); int error; xfs_agblock_t bno; @@ -124,6 +126,8 @@ xfs_rmapbt_alloc_block( xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); + be32_add_cpu(&agf->agf_rmap_blocks, 1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; @@ -143,6 +147,8 @@ xfs_rmapbt_free_block( bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, bno, 1); + be32_add_cpu(&agf->agf_rmap_blocks, -1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); if (error) return error; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 0f96847..0b7f986 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -248,6 +248,7 @@ xfs_growfs_data_private( agf->agf_roots[XFS_BTNUM_RMAPi] = cpu_to_be32(XFS_RMAP_BLOCK(mp)); agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); + agf->agf_rmap_blocks = cpu_to_be32(1); } agf->agf_flfirst = cpu_to_be32(1); -- cgit v0.10.2 From 97dd8c9ee60c98f09be86a2e7e61b18f8d8ed4e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Aug 2016 08:39:47 +1000 Subject: iomap: remove superflous mark_page_accessed from iomap_write_actor This catches up with commit 2457ae ("mm: non-atomically mark page accessed during page cache allocation where possible"), which moved the initial access marking into the pagecache allocator. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/iomap.c b/fs/iomap.c index 48141b8..f39c318 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -199,7 +199,6 @@ again: pagefault_enable(); flush_dcache_page(page); - mark_page_accessed(page); status = iomap_write_end(inode, pos, bytes, copied, page); if (unlikely(status < 0)) -- cgit v0.10.2 From 274c887494cb248eb05f8180bda8298942d98625 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Aug 2016 08:40:18 +1000 Subject: iomap: remove superflous pagefault_disable from iomap_write_actor iov_iter_copy_from_user_atomic disables page faults internally, no need to do it around the call. This also brings the iomap code in line with the original filemap version. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/iomap.c b/fs/iomap.c index f39c318..74712e2 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -194,9 +194,7 @@ again: if (mapping_writably_mapped(inode->i_mapping)) flush_dcache_page(page); - pagefault_disable(); copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); - pagefault_enable(); flush_dcache_page(page); -- cgit v0.10.2 From 8896b8f60951ff0a8b7092f495a445714a1bddc4 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 17 Aug 2016 08:41:10 +1000 Subject: iomap: fiemap should honor the FIEMAP_FLAG_SYNC flag The flag is checked as supported, but then we do an unconditional sync of the file, regardless of whether the flag is set or not. Make the sync conditional on having the FIEMAP_FLAG_SYNC flag set. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/iomap.c b/fs/iomap.c index 74712e2..56c19e6 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -467,9 +467,11 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, if (ret) return ret; - ret = filemap_write_and_wait(inode->i_mapping); - if (ret) - return ret; + if (fi->fi_flags & FIEMAP_FLAG_SYNC) { + ret = filemap_write_and_wait(inode->i_mapping); + if (ret) + return ret; + } while (len > 0) { ret = iomap_apply(inode, start, len, 0, ops, &ctx, -- cgit v0.10.2 From ac2dc058bce81b83631ad5accb90b8f4abe613b7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 17 Aug 2016 08:41:34 +1000 Subject: iomap: prepare iomap_fiemap for attribute mappings By bassing through an -ENOENT, similar to the old XFS implementation of FIEMAP_FLAG_XATTR. Signed-off-by: Dave Chinner [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/iomap.c b/fs/iomap.c index 56c19e6..d9d1f50 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -476,6 +476,9 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, while (len > 0) { ret = iomap_apply(inode, start, len, 0, ops, &ctx, iomap_fiemap_actor); + /* inode with no (attribute) mapping will give ENOENT */ + if (ret == -ENOENT) + break; if (ret < 0) return ret; if (ret == 0) -- cgit v0.10.2 From f20ac7ab17fcf7046bd9bc4166faf9580f713afd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Aug 2016 08:42:34 +1000 Subject: iomap: mark ->iomap_end as optional No need to implement it for read-only mappings. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/iomap.c b/fs/iomap.c index d9d1f50..0342254 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -84,8 +84,11 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, * Now the data has been copied, commit the range we've copied. This * should not fail unless the filesystem has had a fatal error. */ - ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0, - flags, &iomap); + if (ops->iomap_end) { + ret = ops->iomap_end(inode, pos, length, + written > 0 ? written : 0, + flags, &iomap); + } return written ? written : ret; } -- cgit v0.10.2 From b95a21271b30544a9fb992269d79ed1e1978e023 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Aug 2016 08:44:52 +1000 Subject: xfs: simplify xfs_file_iomap_begin We'll never get nimap == 0 for a successful return from xfs_bmapi_read, so don't try to handle it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2114d53..1cce760 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1037,20 +1037,14 @@ xfs_file_iomap_begin( return error; trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); - xfs_bmbt_to_iomap(ip, iomap, &imap); - } else if (nimaps) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - trace_xfs_iomap_found(ip, offset, length, 0, &imap); - xfs_bmbt_to_iomap(ip, iomap, &imap); } else { + ASSERT(nimaps); + xfs_iunlock(ip, XFS_ILOCK_EXCL); - trace_xfs_iomap_not_found(ip, offset, length, 0, &imap); - iomap->blkno = IOMAP_NULL_BLOCK; - iomap->type = IOMAP_HOLE; - iomap->offset = offset; - iomap->length = length; + trace_xfs_iomap_found(ip, offset, length, 0, &imap); } + xfs_bmbt_to_iomap(ip, iomap, &imap); return 0; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 551b7e2..7e88bec 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1298,7 +1298,6 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); DEFINE_IOMAP_EVENT(xfs_iomap_alloc); DEFINE_IOMAP_EVENT(xfs_iomap_found); -DEFINE_IOMAP_EVENT(xfs_iomap_not_found); DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), -- cgit v0.10.2 From 1d4795e7bde075588c90df2175349bb2251802d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Aug 2016 08:45:30 +1000 Subject: xfs: (re-)implement FIEMAP_FLAG_XATTR Use a special read-only iomap_ops implementation to support fiemap on the attr fork. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1cce760..697c8fd 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1106,3 +1106,48 @@ struct iomap_ops xfs_iomap_ops = { .iomap_begin = xfs_file_iomap_begin, .iomap_end = xfs_file_iomap_end, }; + +static int +xfs_xattr_iomap_begin( + struct inode *inode, + loff_t offset, + loff_t length, + unsigned flags, + struct iomap *iomap) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); + struct xfs_bmbt_irec imap; + int nimaps = 1, error = 0; + unsigned lockmode; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + lockmode = xfs_ilock_data_map_shared(ip); + + /* if there are no attribute fork or extents, return ENOENT */ + if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { + error = -ENOENT; + goto out_unlock; + } + + ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); +out_unlock: + xfs_iunlock(ip, lockmode); + + if (!error) { + ASSERT(nimaps); + xfs_bmbt_to_iomap(ip, iomap, &imap); + } + + return error; +} + +struct iomap_ops xfs_xattr_iomap_ops = { + .iomap_begin = xfs_xattr_iomap_begin, +}; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index e066d04..fb8aca3 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -35,5 +35,6 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); extern struct iomap_ops xfs_iomap_ops; +extern struct iomap_ops xfs_xattr_iomap_ops; #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ab820f8..b24c310 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1009,7 +1009,14 @@ xfs_vn_fiemap( int error; xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED); - error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops); + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { + fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; + error = iomap_fiemap(inode, fieinfo, start, length, + &xfs_xattr_iomap_ops); + } else { + error = iomap_fiemap(inode, fieinfo, start, length, + &xfs_iomap_ops); + } xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); return error; -- cgit v0.10.2 From a03f1a6633144300ef4a3a33e95dfa11866f1299 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 17 Aug 2016 11:12:57 +1000 Subject: xfs: remove OWN_AG rmap when allocating a block from the AGFL When we're really tight on space, xfs_alloc_ag_vextent_small() can allocate a block from the AGFL and give it to the caller. Since the caller is never the AGFL-fixing method, we must remove the OWN_AG reverse mapping because it will clash with whatever rmap the caller wants to set up. This bug was discovered by running generic/299 repeatedly. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index af1a72e..3dd8f1d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1582,6 +1582,7 @@ xfs_alloc_ag_vextent_small( xfs_extlen_t *flenp, /* result length */ int *stat) /* status: 0-freelist, 1-normal/none */ { + struct xfs_owner_info oinfo; int error; xfs_agblock_t fbno; xfs_extlen_t flen; @@ -1624,6 +1625,18 @@ xfs_alloc_ag_vextent_small( error0); args->wasfromfl = 1; trace_xfs_alloc_small_freelist(args); + + /* + * If we're feeding an AGFL block to something that + * doesn't live in the free space, we need to clear + * out the OWN_AG rmap. + */ + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); + error = xfs_rmap_free(args->tp, args->agbp, args->agno, + fbno, 1, &oinfo); + if (error) + goto error0; + *stat = 0; return 0; } -- cgit v0.10.2 From b347af816ad2086c1dacf9f74973b82f83e877be Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 11 Aug 2016 17:14:45 -0700 Subject: md: do not count journal as spare in GET_ARRAY_INFO GET_ARRAY_INFO counts journal as spare (spare_disks), which is not accurate. This patch fixes this. Reported-by: Yi Zhang Signed-off-by: Song Liu Signed-off-by: Shaohua Li diff --git a/drivers/md/md.c b/drivers/md/md.c index 2c3ab6f..d750b52 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5851,6 +5851,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg) working++; if (test_bit(In_sync, &rdev->flags)) insync++; + else if (test_bit(Journal, &rdev->flags)) + /* TODO: add journal count to md_u.h */ + ; else spare++; } -- cgit v0.10.2 From 1b856086813be9371929b6cc62045f9fd470f5a0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 16 Aug 2016 16:48:36 -0700 Subject: block: Fix race triggered by blk_set_queue_dying() blk_set_queue_dying() can be called while another thread is submitting I/O or changing queue flags, e.g. through dm_stop_queue(). Hence protect the QUEUE_FLAG_DYING flag change with locking. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Mike Snitzer Cc: stable Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 999442e..36c7ac3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end); void blk_set_queue_dying(struct request_queue *q) { - queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); + spin_lock_irq(q->queue_lock); + queue_flag_set(QUEUE_FLAG_DYING, q); + spin_unlock_irq(q->queue_lock); if (q->mq_ops) blk_mq_wake_waiters(q); -- cgit v0.10.2 From 9ac715954682b23d293d910ad2697554171035e7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 17 Aug 2016 10:46:10 +0200 Subject: KVM: arm/arm64: Change misleading use of is_error_pfn When converting a gfn to a pfn, we call gfn_to_pfn_prot, which returns various kinds of error values. It turns out that is_error_pfn() only returns true when the gfn was found in a memory slot and could somehow not be used, but it does not return true if the gfn does not belong to any memory slot. Change use to is_error_noslot_pfn() which covers both cases. Note: Since we already check for kvm_is_error_hva(hva) explicitly in the caller of this function while holding the kvm->srcu lock protecting the memory slots, this should never be a problem, but nevertheless this change is warranted as it shows the intention of the code. Reported-by: James Hogan Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index bda27b6..29d0b23 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1309,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, smp_rmb(); pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); - if (is_error_pfn(pfn)) + if (is_error_noslot_pfn(pfn)) return -EFAULT; if (kvm_is_device_pfn(pfn)) { -- cgit v0.10.2 From 674e70127069f3fd3c58fb0f94c60eb0f6567d78 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Aug 2016 15:03:01 +0100 Subject: arm64: Document workaround for Cortex-A72 erratum #853709 We already have a workaround for Cortex-A57 erratum #852523, but Cortex-A72 r0p0 to r0p2 do suffer from the same issue (known as erratum #853709). Let's document the fact that we already handle this. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 4da60b4..ccc6032 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -53,6 +53,7 @@ stable kernels. | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | | ARM | Cortex-A57 | #852523 | N/A | | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| ARM | Cortex-A72 | #853709 | N/A | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index ae7855f..5a84b45 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -256,7 +256,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) /* * We must restore the 32-bit state before the sysregs, thanks - * to Cortex-A57 erratum #852523. + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). */ __sysreg32_restore_state(vcpu); __sysreg_restore_guest_state(guest_ctxt); -- cgit v0.10.2 From cabdc5c59ab46a1ec5ea98c5ac4022111fbfd63a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Aug 2016 15:03:02 +0100 Subject: KVM: arm/arm64: timer: Workaround misconfigured timer interrupt Similarily to f005bd7e3b84 ("clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered"), make sure we can survive an interrupt that has been misconfigured as edge-triggered by forcing it to be level-triggered (active low is assumed, but the GIC doesn't really care whether this is high or low). Hopefully, the amount of shouting in the kernel log will convince the user to do something about their firmware. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4fde8c7..77e6ccf 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -33,6 +33,7 @@ static struct timecounter *timecounter; static struct workqueue_struct *wqueue; static unsigned int host_vtimer_irq; +static u32 host_vtimer_irq_flags; void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { @@ -365,7 +366,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) static void kvm_timer_init_interrupt(void *info) { - enable_percpu_irq(host_vtimer_irq, 0); + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) @@ -432,6 +433,14 @@ int kvm_timer_hyp_init(void) } host_vtimer_irq = info->virtual_irq; + host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); + if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && + host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for IRQ%d, assuming level low\n", + host_vtimer_irq); + host_vtimer_irq_flags = IRQF_TRIGGER_LOW; + } + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, "kvm guest timer", kvm_get_running_vcpus()); if (err) { -- cgit v0.10.2 From 1e12c4a9393b75a744aada2c8115434572698bc3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Aug 2016 14:19:42 +0100 Subject: genirq: Correctly configure the trigger on chained interrupts Commit 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ") moved the trigger configuration call from the irqdomain mapping to the interrupt being actually requested. This patch failed to handle the case where we configure a chained interrupt, which doesn't get requested through the usual path. In order to solve this, let's call __irq_set_trigger just before starting the cascade interrupt. Special care must be taken to make the flow handler stick, as the .irq_set_type method could have reset it (it doesn't know we're dealing with a chained interrupt). Based on an initial patch by Jon Hunter. Fixes: 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ") Reported-by: John Stultz Reported-by: Linus Walleij Tested-by: John Stultz Acked-by: Jon Hunter Signed-off-by: Marc Zyngier diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b4c1bc7..6373890 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -820,6 +820,17 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, desc->name = name; if (handle != handle_bad_irq && is_chained) { + /* + * We're about to start this interrupt immediately, + * hence the need to set the trigger configuration. + * But the .set_type callback may have overridden the + * flow handler, ignoring that we're dealing with a + * chained interrupt. Reset it immediately because we + * do know better. + */ + __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data)); + desc->handle_irq = handle; + irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); -- cgit v0.10.2 From b63bebe2355cf2632a2979fd2982c88d080c44b6 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 10 Aug 2016 10:49:42 +0100 Subject: arm64: KVM: remove misleading comment on pmu status Comment about how PMU access is handled is not relavant since v4.6 where proper PMU support was added in. Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b0b225c..af5ea86 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -823,14 +823,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 * - * We could trap ID_DFR0 and tell the guest we don't support performance - * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was - * NAKed, so it will read the PMCR anyway. - * - * Therefore we tell the guest we have 0 counters. Unfortunately, we - * must always support PMCCNTR (the cycle counter): we just RAZ/WI for - * all PM registers, which doesn't crash the guest kernel at least. - * * Debug handling: We do trap most, if not all debug related system * registers. The implementation is good enough to ensure that a guest * can use these with minimal performance degradation. The drawback is -- cgit v0.10.2 From f7f6f2d94f0027242ddfd665289b107a873fde43 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 10 Aug 2016 10:49:43 +0100 Subject: arm64: KVM: report configured SRE value to 32-bit world After commit b34f2bc ("arm64: KVM: Make ICC_SRE_EL1 access return the configured SRE value") we report SRE value to 64-bit guest, but 32-bit one still handled as RAZ/WI what leads to funny promise we do not keep: "GICv3: GIC: unable to set SRE (disabled at EL2), panic ahead" Instead, return the actual value of the ICC_SRE_EL1 register that the guest should see. [ Tweaked commit message - Christoffer ] Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index af5ea86..e51367d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1352,7 +1352,7 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, /* ICC_SRE */ - { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi }, + { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, -- cgit v0.10.2 From 0e62fd836e4c2908cc1e32c68806529b4f859955 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 17 Aug 2016 12:21:33 +0200 Subject: clocksource/drivers/time-armada-370-xp: Fix the clock reference While converting the init function to return an error, the wrong clock was get. This leads to the wrong clock rate and slows down the kernel. For example, it affects typical boot time: - without fix: over 1 minute - with fix: 15 seconds Tested-by: Stefan Roese Tested-by: Ralph Sennhauser Signed-off-by: Gregory CLEMENT Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 12549e27c63c ("clocksource/drivers/time-armada-370-xp: Convert init function to return error") Link: http://lkml.kernel.org/r/1471429296-9053-1-git-send-email-daniel.lezcano@linaro.org [ Refined the changelog. ] Signed-off-by: Ingo Molnar diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index 719b478..3c39e6f 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -338,7 +338,6 @@ static int __init armada_xp_timer_init(struct device_node *np) struct clk *clk = of_clk_get_by_name(np, "fixed"); int ret; - clk = of_clk_get(np, 0); if (IS_ERR(clk)) { pr_err("Failed to get clock"); return PTR_ERR(clk); -- cgit v0.10.2 From 16c8eba0fe01e03317f48868105103a8f5938e85 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Aug 2016 12:21:34 +0200 Subject: clocksource/drivers/kona: Fix get_counter() error handling I could not figure out why, but GCC cannot prove that the kona_timer_init() function always initializes its two outputs, and we get a warning for the use of the 'lsw' variable later, which is obviously correct. drivers/clocksource/bcm_kona_timer.c: In function 'kona_timer_init': drivers/clocksource/bcm_kona_timer.c:119:13: error: 'lsw' may be used uninitialized in this function [-Werror=maybe-uninitialized] Slightly reordering the loop makes the warning disappear, after it becomes more obvious to the compiler that the loop is always entered on the first iteration. As pointed out by Ray Jui, there is a related problem in the way we deal with the loop running into the limit, as we just keep going there with an invalid counter data, so instead we now propagate a -ETIMEDOUT result to the caller. Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Lezcano Acked-by: Ray Jui Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bcm-kernel-feedback-list@broadcom.com Link: http://lkml.kernel.org/r/1471429296-9053-2-git-send-email-daniel.lezcano@linaro.org Link: https://patchwork.kernel.org/patch/9174261/ Signed-off-by: Ingo Molnar diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c index 7e3fd37..92f6e4d 100644 --- a/drivers/clocksource/bcm_kona_timer.c +++ b/drivers/clocksource/bcm_kona_timer.c @@ -66,10 +66,10 @@ static void kona_timer_disable_and_clear(void __iomem *base) } -static void +static int kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw) { - int loop_limit = 4; + int loop_limit = 3; /* * Read 64-bit free running counter @@ -83,18 +83,19 @@ kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw) * if new hi-word is equal to previously read hi-word then stop. */ - while (--loop_limit) { + do { *msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET); *lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET); if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET)) break; - } + } while (--loop_limit); if (!loop_limit) { pr_err("bcm_kona_timer: getting counter failed.\n"); pr_err(" Timer will be impacted\n"); + return -ETIMEDOUT; } - return; + return 0; } static int kona_timer_set_next_event(unsigned long clc, @@ -112,8 +113,11 @@ static int kona_timer_set_next_event(unsigned long clc, uint32_t lsw, msw; uint32_t reg; + int ret; - kona_timer_get_counter(timers.tmr_regs, &msw, &lsw); + ret = kona_timer_get_counter(timers.tmr_regs, &msw, &lsw); + if (ret) + return ret; /* Load the "next" event tick value */ writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET); -- cgit v0.10.2 From be5769e2061ac40b32daa83e28e1c4aac7133511 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 17 Aug 2016 12:21:35 +0200 Subject: clocksource/drivers/mips-gic-timer: Make gic_clocksource_of_init() return int In commit: d8152bf85d2c0 ("clocksource/drivers/mips-gic-timer: Convert init function to return error") several return values were added to a void function resulting in the following warnings: clocksource/mips-gic-timer.c: In function 'gic_clocksource_of_init': clocksource/mips-gic-timer.c:175:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:183:4: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:190:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:195:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:200:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:211:2: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c: At top level: clocksource/mips-gic-timer.c:213:1: warning: comparison of distinct pointer types lacks a cast [enabled by default] clocksource/mips-gic-timer.c: In function 'gic_clocksource_of_init': clocksource/mips-gic-timer.c:183:18: warning: ignoring return value of 'PTR_ERR', declared with attribute warn_unused_result [-Wunused-result] Given that the addition of the return values was intentional, it seems that the conversion of the containing function from void to int was simply overlooked. Signed-off-by: Paul Gortmaker Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Fixes: d8152bf85d2c ("clocksource/drivers/mips-gic-timer: Convert init function to return error") Link: http://lkml.kernel.org/r/1471429296-9053-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index d91e872..b4b3ab5 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -164,7 +164,7 @@ void __init gic_clocksource_init(unsigned int frequency) gic_start_count(); } -static void __init gic_clocksource_of_init(struct device_node *node) +static int __init gic_clocksource_of_init(struct device_node *node) { struct clk *clk; int ret; -- cgit v0.10.2 From 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 10 Aug 2016 15:43:06 -0400 Subject: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork cgroup_threadgroup_rwsem is acquired in read mode during process exit and fork. It is also grabbed in write mode during __cgroups_proc_write(). I've recently run into a scenario with lots of memory pressure and OOM and I am beginning to see systemd __switch_to+0x1f8/0x350 __schedule+0x30c/0x990 schedule+0x48/0xc0 percpu_down_write+0x114/0x170 __cgroup_procs_write.isra.12+0xb8/0x3c0 cgroup_file_write+0x74/0x1a0 kernfs_fop_write+0x188/0x200 __vfs_write+0x6c/0xe0 vfs_write+0xc0/0x230 SyS_write+0x6c/0x110 system_call+0x38/0xb4 This thread is waiting on the reader of cgroup_threadgroup_rwsem to exit. The reader itself is under memory pressure and has gone into reclaim after fork. There are times the reader also ends up waiting on oom_lock as well. __switch_to+0x1f8/0x350 __schedule+0x30c/0x990 schedule+0x48/0xc0 jbd2_log_wait_commit+0xd4/0x180 ext4_evict_inode+0x88/0x5c0 evict+0xf8/0x2a0 dispose_list+0x50/0x80 prune_icache_sb+0x6c/0x90 super_cache_scan+0x190/0x210 shrink_slab.part.15+0x22c/0x4c0 shrink_zone+0x288/0x3c0 do_try_to_free_pages+0x1dc/0x590 try_to_free_pages+0xdc/0x260 __alloc_pages_nodemask+0x72c/0xc90 alloc_pages_current+0xb4/0x1a0 page_table_alloc+0xc0/0x170 __pte_alloc+0x58/0x1f0 copy_page_range+0x4ec/0x950 copy_process.isra.5+0x15a0/0x1870 _do_fork+0xa8/0x4b0 ppc_clone+0x8/0xc In the meanwhile, all processes exiting/forking are blocked almost stalling the system. This patch moves the threadgroup_change_begin from before cgroup_fork() to just before cgroup_canfork(). There is no nee to worry about threadgroup changes till the task is actually added to the threadgroup. This avoids having to call reclaim with cgroup_threadgroup_rwsem held. tj: Subject and description edits. Signed-off-by: Balbir Singh Acked-by: Zefan Li Cc: Oleg Nesterov Cc: Andrew Morton Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Tejun Heo diff --git a/kernel/fork.c b/kernel/fork.c index 52e725d4..aaf7823 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1404,7 +1404,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = ktime_get_boot_ns(); p->io_context = NULL; p->audit_context = NULL; - threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); @@ -1556,6 +1555,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; + threadgroup_change_begin(current); /* * Ensure that the cgroup subsystem policies allow the new process to be * forked. It should be noted the the new process's css_set can be changed @@ -1656,6 +1656,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, bad_fork_cancel_cgroup: cgroup_cancel_fork(p); bad_fork_free_pid: + threadgroup_change_end(current); if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_thread: @@ -1688,7 +1689,6 @@ bad_fork_cleanup_policy: mpol_put(p->mempolicy); bad_fork_cleanup_threadgroup_lock: #endif - threadgroup_change_end(current); delayacct_tsk_free(p); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); -- cgit v0.10.2 From 9e7d9367e6a29e4cac85aa6df199f760dfa39c8a Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 17 Aug 2016 15:36:44 +0200 Subject: dm raid: support raid0 with missing metadata devices The raid0 MD personality does not start a raid0 array with any of its data devices missing. dm-raid was removing data/metadata device pairs unconditionally if it failed to read a superblock off the respective metadata device of such pair, resulting in failure to start arrays with the raid0 personality. Avoid removing any data/metadata device pairs in case of raid0 (e.g. lvm2 segment type 'raid0_meta') thus allowing MD to start the array. Also, avoid region size validation for raid0. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index b1c251872..8abde6b 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -860,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) { unsigned long min_region_size = rs->ti->len / (1 << 21); + if (rs_is_raid0(rs)) + return 0; + if (!region_size) { /* * Choose a reasonable default. All figures in sectors. @@ -929,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs) rebuild_cnt++; switch (rs->raid_type->level) { + case 0: + break; case 1: if (rebuild_cnt >= rs->md.raid_disks) goto too_many; @@ -2334,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) case 0: break; default: + /* + * We have to keep any raid0 data/metadata device pairs or + * the MD raid0 personality will fail to start the array. + */ + if (rs_is_raid0(rs)) + continue; + dev = container_of(rdev, struct raid_dev, rdev); if (dev->meta_dev) dm_put_device(ti, dev->meta_dev); -- cgit v0.10.2 From e77e6ff502ea3d193872b5b9033bfd9717b36447 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 15 Aug 2016 21:50:35 +0800 Subject: netfilter: conntrack: do not dump other netns's conntrack entries via proc We should skip the conntracks that belong to a different namespace, otherwise other unrelated netns's conntrack entries will be dumped via /proc/net/nf_conntrack. Fixes: 56d52d4892d0 ("netfilter: conntrack: use a single hashtable for all namespaces") Signed-off-by: Liping Zhang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 958a145..9f267c3 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -205,6 +205,7 @@ static int ct_seq_show(struct seq_file *s, void *v) struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l4proto *l4proto; + struct net *net = seq_file_net(s); int ret = 0; NF_CT_ASSERT(ct); @@ -215,6 +216,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (NF_CT_DIRECTION(hash)) goto release; + if (!net_eq(nf_ct_net(ct), net)) + goto release; + l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); NF_CT_ASSERT(l3proto); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); -- cgit v0.10.2 From 2497b84625466dc57b8c3a40cd41a659fe04cca6 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 22:46:04 +0800 Subject: netfilter: nfnetlink_log: add "nf-logger-3-1" module alias name Otherwise, if nfnetlink_log.ko is not loaded, we cannot add rules to log packets to the userspace when we specify it with arp family, such as: # nft add rule arp filter input log group 0 :1:1-37: Error: Could not process rule: No such file or directory add rule arp filter input log group 0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index cbcfdfb..6577db5 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1147,6 +1147,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG); MODULE_ALIAS_NF_LOGGER(AF_INET, 1); MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); +MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */ module_init(nfnetlink_log_init); module_exit(nfnetlink_log_fini); -- cgit v0.10.2 From bfe6c8a89e03f52bccf922f74ced8fe959e7fd36 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 15 Aug 2016 16:33:10 +0100 Subject: arm64: Fix NUMA build error when !CONFIG_ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since asm/acpi.h is only included by linux/acpi.h when CONFIG_ACPI is enabled, disabling the latter leads to the following build error on arm64: arch/arm64/mm/numa.c: In function ‘arm64_numa_init’: arch/arm64/mm/numa.c:395:24: error: ‘arm64_acpi_numa_init’ undeclared (first use in this function) if (!acpi_disabled && !numa_init(arm64_acpi_numa_init)) This patch include the asm/acpi.h explicitly in arch/arm64/mm/numa.c for the arm64_acpi_numa_init() definition. Fixes: d8b47fca8c23 ("arm64, ACPI, NUMA: NUMA support based on SRAT and SLIT") Reviewed-by: Hanjun Guo Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index c7fe3ec..5bb15ea 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -23,6 +23,8 @@ #include #include +#include + struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); nodemask_t numa_nodes_parsed __initdata; -- cgit v0.10.2 From bc9f3d7788a88d080a30599bde68f383daf8f8a5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Aug 2016 17:54:41 +0200 Subject: arm64: kernel: avoid literal load of virtual address with MMU off Literal loads of virtual addresses are subject to runtime relocation when CONFIG_RELOCATABLE=y, and given that the relocation routines run with the MMU and caches enabled, literal loads of relocated values performed with the MMU off are not guaranteed to return the latest value unless the memory covering the literal is cleaned to the PoC explicitly. So defer the literal load until after the MMU has been enabled, just like we do for primary_switch() and secondary_switch() in head.S. Fixes: 1e48ef7fcc37 ("arm64: add support for building vmlinux as a relocatable PIE binary") Cc: # 4.6+ Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 9a3aec9..ccf79d8 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,12 +101,20 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly /* enable the MMU early - so we can access sleep_save_stash by va */ adr_l lr, __enable_mmu /* __cpu_setup will return here */ - ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adr_l x27, _resume_switched /* __enable_mmu will branch here */ adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir b __cpu_setup ENDPROC(cpu_resume) + .pushsection ".idmap.text", "ax" +_resume_switched: + ldr x8, =_cpu_resume + br x8 +ENDPROC(_resume_switched) + .ltorg + .popsection + ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash -- cgit v0.10.2 From 207efcd2b55e0460dfee35663fbb3d05efad990a Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 12 Aug 2016 13:42:40 +0800 Subject: md: remove obsolete ret in md_start_sync The ret is not needed anymore since we have already move resync_start into md_do_sync in commit 41a9a0d. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li diff --git a/drivers/md/md.c b/drivers/md/md.c index d750b52..19d8e23 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8278,16 +8278,13 @@ no_add: static void md_start_sync(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work); - int ret = 0; mddev->sync_thread = md_register_thread(md_do_sync, mddev, "resync"); if (!mddev->sync_thread) { - if (!(mddev_is_clustered(mddev) && ret == -EAGAIN)) - printk(KERN_ERR "%s: could not start resync" - " thread...\n", - mdname(mddev)); + printk(KERN_ERR "%s: could not start resync thread...\n", + mdname(mddev)); /* leave the spares where they are, it shouldn't hurt */ clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); -- cgit v0.10.2 From c622ca543bff8e73efacf4dafa0cc9851ecea511 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Tue, 16 Aug 2016 14:26:08 +0200 Subject: md: don't print the same repeated messages about delayed sync operation This fixes a long-standing bug that caused a flood of messages like: "md: delaying data-check of md1 until md2 has finished (they share one or more physical units)" It can be reproduced like this: 1. Create at least 3 raid1 arrays on a pair of disks, each on different partitions. 2. Request a sync operation like 'check' or 'repair' on 2 arrays by writing to their md/sync_action attribute files. One operation should start and one should be delayed and a message like the above will be printed. 3. Issue a write to the third array. Each write will cause 2 copies of the message to be printed. This happens when wake_up(&resync_wait) is called, usually by md_check_recovery(). Then the delayed sync thread again prints the message and is put to sleep. This patch adds a check in md_do_sync() to prevent printing this message more than once for the same pair of devices. Reported-by: Sven Koehler Link: https://bugzilla.kernel.org/show_bug.cgi?id=151801 Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li diff --git a/drivers/md/md.c b/drivers/md/md.c index 19d8e23..cc25cbc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7865,6 +7865,7 @@ void md_do_sync(struct md_thread *thread) */ do { + int mddev2_minor = -1; mddev->curr_resync = 2; try_again: @@ -7894,10 +7895,14 @@ void md_do_sync(struct md_thread *thread) prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && mddev2->curr_resync >= mddev->curr_resync) { - printk(KERN_INFO "md: delaying %s of %s" - " until %s has finished (they" - " share one or more physical units)\n", - desc, mdname(mddev), mdname(mddev2)); + if (mddev2_minor != mddev2->md_minor) { + mddev2_minor = mddev2->md_minor; + printk(KERN_INFO "md: delaying %s of %s" + " until %s has finished (they" + " share one or more physical units)\n", + desc, mdname(mddev), + mdname(mddev2)); + } mddev_put(mddev2); if (signal_pending(current)) flush_signals(current); -- cgit v0.10.2 From 059e232089e45b0befc9933d31209c225e08b426 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Aug 2016 07:50:44 +0100 Subject: irqchip/gic: Allow self-SGIs for SMP on UP configurations On systems where a single CPU is present, the GIC may not support having SGIs delivered to a target list. In that case, we use the self-SGI mechanism to allow the interrupt to be delivered locally. Tested-by: Fabio Estevam Signed-off-by: Marc Zyngier diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index c2cab57..390fac5 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -769,6 +769,13 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) int cpu; unsigned long flags, map = 0; + if (unlikely(nr_cpu_ids == 1)) { + /* Only one CPU? let's do a self-IPI... */ + writel_relaxed(2 << 24 | irq, + gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); + return; + } + raw_spin_lock_irqsave(&irq_controller_lock, flags); /* Convert our logical CPU mask into a physical one. */ -- cgit v0.10.2 From ccd9432a5c85f35df7b491a1b701560d247466a5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 17 Aug 2016 13:49:19 +0100 Subject: irqchip/gicv3: Remove disabling redistributor and group1 non-secure interrupts As per the GICv3 specification, to power down a processor using GICv3 and allow automatic power-on if an interrupt must be sent to a processor, software must set Enable to zero for all interrupt groups(by writing to GICC_CTLR or ICC_IGRPEN{0,1}_EL1/3 as appropriate. When commit 3708d52fc6bb ("irqchip: gic-v3: Implement CPU PM notifier") was introduced there were no firmware implementations(in particular PSCI) handling this. Linux kernel may not be aware of the CPU power state details and might fail to identify the power states that require quiescing the CPU interface. Even if it can be aware of those details, it can't determine which CPU power state have been triggered at the platform level and how the power control is implemented. This patch make disabling redistributor and group1 non-secure interrupts in the power down path and re-enabling of redistributor in the power-up path conditional. It will be handled in the kernel if and only if the non-secure accesses are permitted to access and modify control registers. It is left to the platform implementation otherwise. Cc: Marc Zyngier Cc: Lorenzo Pieralisi Cc: Thomas Gleixner Cc: Jason Cooper Tested-by: Christopher Covington Signed-off-by: Sudeep Holla Signed-off-by: Marc Zyngier diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 6fc56c3..ede5672 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -667,13 +667,20 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, #endif #ifdef CONFIG_CPU_PM +/* Check whether it's single security state view */ +static bool gic_dist_security_disabled(void) +{ + return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; +} + static int gic_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { if (cmd == CPU_PM_EXIT) { - gic_enable_redist(true); + if (gic_dist_security_disabled()) + gic_enable_redist(true); gic_cpu_sys_reg_init(); - } else if (cmd == CPU_PM_ENTER) { + } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) { gic_write_grpen1(0); gic_enable_redist(false); } -- cgit v0.10.2 From aca300183ed4f723837f6619facff0890c46d313 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 23:13:02 +0800 Subject: netfilter: nfnetlink_acct: report overquota to the right netns We should report the over quota message to the right net namespace instead of the init netns. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index 80ca889..664da00 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -15,6 +15,6 @@ struct nf_acct; struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name); void nfnl_acct_put(struct nf_acct *acct); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); -extern int nfnl_acct_overquota(const struct sk_buff *skb, - struct nf_acct *nfacct); +int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb, + struct nf_acct *nfacct); #endif /* _NFNL_ACCT_H */ diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 1b4de4b..796605b 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -443,7 +443,7 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_update); -static void nfnl_overquota_report(struct nf_acct *nfacct) +static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct) { int ret; struct sk_buff *skb; @@ -458,11 +458,12 @@ static void nfnl_overquota_report(struct nf_acct *nfacct) kfree_skb(skb); return; } - netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, + netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, GFP_ATOMIC); } -int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) +int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb, + struct nf_acct *nfacct) { u64 now; u64 *quota; @@ -480,7 +481,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) if (now >= *quota && !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) { - nfnl_overquota_report(nfacct); + nfnl_overquota_report(net, nfacct); } return ret; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 3048a7e..cf32759 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) nfnl_acct_update(skb, info->nfacct); - overquota = nfnl_acct_overquota(skb, info->nfacct); + overquota = nfnl_acct_overquota(par->net, skb, info->nfacct); return overquota == NFACCT_UNDERQUOTA ? false : true; } -- cgit v0.10.2 From dcbe35909c8426e1ace74b4b99c4cb403cdaca89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Aug 2016 09:56:46 -0700 Subject: netfilter: tproxy: properly refcount tcp listeners inet_lookup_listener() and inet6_lookup_listener() no longer take a reference on the found listener. This minimal patch adds back the refcounting, but we might do this differently in net-next later. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Reported-and-tested-by: Denys Fedoryshchenko Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 7f4414d..663c4c3 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -127,6 +127,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, daddr, dport, in->ifindex); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in * xt_socket, since xt_TPROXY needs 0 bound @@ -195,6 +197,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, daddr, ntohs(dport), in->ifindex); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in * xt_socket, since xt_TPROXY needs 0 bound -- cgit v0.10.2 From 7a35583ec5b64f17559c9de8d7c47f7360e40362 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:28 +0200 Subject: mlxsw: spectrum: Don't return upon error in removal path When removing a VLAN filter from the device we shouldn't return upon the first error we encounter, as otherwise we'll have resources that will never be freed nor used. Instead, we should keep trying to free as much resources as possible in a best effort mode. Remove the error message as well, since we already get these from the EMAD transaction code. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e1b8f62..76b53ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1010,7 +1010,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; struct mlxsw_sp_fid *f; - int err; /* VLAN 0 is removed from HW filter when device goes down, but * it is reserved in our case, so simply return. @@ -1019,23 +1018,12 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, return 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - netdev_warn(dev, "VID=%d does not exist\n", vid); + if (WARN_ON(!mlxsw_sp_vport)) return 0; - } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); - return err; - } + mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - if (err) { - netdev_err(dev, "Failed to enable learning for VID=%d\n", vid); - return err; - } + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); /* Drop FID reference. If this was the last reference the * resources will be freed. @@ -1048,13 +1036,8 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, * transition all active 802.1Q bridge VLANs to use VID to FID * mappings and set port's mode to VLAN mode. */ - if (list_is_singular(&mlxsw_sp_port->vports_list)) { - err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to VLAN mode\n"); - return err; - } - } + if (list_is_singular(&mlxsw_sp_port->vports_list)) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); -- cgit v0.10.2 From fa66d7e3fea7504e241e9004998af2c71814da18 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:29 +0200 Subject: mlxsw: spectrum: Remove redundant errors from the code Currently, when device configuration fails we emit errors to the kernel log despite the fact we already get these from the EMAD transaction layer, so remove them. In addition to being unnecessary, removing these error messages will allow us to reuse mlxsw_sp_port_add_vid() to create the PVID vPort before registering the netdevice. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 76b53ed..a9281afc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -956,16 +956,12 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, if (!vid) return 0; - if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) { - netdev_warn(dev, "VID=%d already configured\n", vid); + if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) return 0; - } mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - netdev_err(dev, "Failed to create vPort for VID=%d\n", vid); + if (!mlxsw_sp_vport) return -ENOMEM; - } /* When adding the first VLAN interface on a bridged port we need to * transition all the active 802.1Q bridge VLANs to use explicit @@ -973,24 +969,17 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, */ if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to Virtual mode\n"); + if (err) goto err_port_vp_mode_trans; - } } err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) { - netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); + if (err) goto err_port_vid_learning_set; - } err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); + if (err) goto err_port_add_vid; - } return 0; -- cgit v0.10.2 From 05978481e77e47b0bcb1767d3783fa0e5a18f399 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:30 +0200 Subject: mlxsw: spectrum: Create PVID vPort before registering netdevice After registering a netdevice it's possible for user space applications to configure an IP address on it. From the driver's perspective, this means a router interface (RIF) should be created for the PVID vPort. Therefore, we must create the PVID vPort before registering the netdevice. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a9281afc..0677f3f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -942,8 +942,8 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) kfree(mlxsw_sp_vport); } -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid) +static int mlxsw_sp_port_add_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; @@ -2048,6 +2048,18 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } +static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->pvid = 1; + + return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1); +} + +static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); +} + static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool split, u8 module, u8 width, u8 lane) { @@ -2163,6 +2175,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_dcb_init; } + err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n", + mlxsw_sp_port->local_port); + goto err_port_pvid_vport_create; + } + mlxsw_sp_port_switchdev_init(mlxsw_sp_port); err = register_netdev(dev); if (err) { @@ -2180,18 +2199,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } - err = mlxsw_sp_port_vlan_init(mlxsw_sp_port); - if (err) - goto err_port_vlan_init; - mlxsw_sp->ports[local_port] = mlxsw_sp_port; return 0; -err_port_vlan_init: - mlxsw_core_port_fini(&mlxsw_sp_port->core_port); err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); +err_port_pvid_vport_create: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); err_port_dcb_init: err_port_ets_init: @@ -2221,8 +2236,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp->ports[local_port] = NULL; mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); - mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index f69aa37..ab3feb8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -536,8 +536,6 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid); int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, bool set); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a1ad5e6..b5e864d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -997,13 +997,13 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, } static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, bool init) + u16 vid_begin, u16 vid_end) { struct net_device *dev = mlxsw_sp_port->dev; u16 vid, pvid; int err; - if (!init && !mlxsw_sp_port->bridged) + if (!mlxsw_sp_port->bridged) return -EINVAL; err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, @@ -1014,9 +1014,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - if (init) - goto out; - pvid = mlxsw_sp_port->pvid; if (pvid >= vid_begin && pvid <= vid_end) { err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); @@ -1028,7 +1025,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); -out: /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); @@ -1039,8 +1035,8 @@ out: static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan) { - return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, - vlan->vid_begin, vlan->vid_end, false); + return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin, + vlan->vid_end); } void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) @@ -1048,7 +1044,7 @@ void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) u16 vid; for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) - __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false); + __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid); } static int @@ -1546,32 +1542,6 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_fdb_fini(mlxsw_sp); } -int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - int err; - - /* Allow only untagged packets to ingress and tag them internally - * with VID 1. - */ - mlxsw_sp_port->pvid = 1; - err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1, - true); - if (err) { - netdev_err(dev, "Unable to init VLANs\n"); - return err; - } - - /* Add implicit VLAN interface in the device, so that untagged - * packets will be classified to the default vFID. - */ - err = mlxsw_sp_port_add_vid(dev, 0, 1); - if (err) - netdev_err(dev, "Failed to configure default vFID\n"); - - return err; -} - void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port) { mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops; -- cgit v0.10.2 From 2f25844c233650b2abb92b66b3d0af7d73b5f88f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:31 +0200 Subject: mlxsw: spectrum: Mark port as active before registering it Commit bbf2a4757b30 ("mlxsw: spectrum: Initialize ports at the end of init sequence") moved ports initialization to the end of the init sequence, which means ports are the first to be removed during fini. Since the FDB delayed work is still active when ports are removed it's possible for it to process FDB notifications of inactive ports, resulting in a warning message. Fix that by marking ports as inactive only after unregistering them. The NETDEV_UNREGISTER event will invoke bridge's driver port removal sequence that will cause the FDB (and FDB notifications) to be flushed. Fixes: bbf2a4757b30 ("mlxsw: spectrum: Initialize ports at the end of init sequence") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 0677f3f..12681db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2183,6 +2183,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, } mlxsw_sp_port_switchdev_init(mlxsw_sp_port); + mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n", @@ -2199,12 +2200,12 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } - mlxsw_sp->ports[local_port] = mlxsw_sp_port; return 0; err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); err_port_pvid_vport_create: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); @@ -2233,9 +2234,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; - mlxsw_sp->ports[local_port] = NULL; mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); -- cgit v0.10.2 From c20b80187a93b4fcc1c5c46fc8a436df1f17636d Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 17 Aug 2016 16:39:32 +0200 Subject: mlxsw: spectrum: Add missing packet traps Add the following traps: 1) MTU Error: Trap packets whose size is bigger than the egress RIF's MTU. If DF bit isn't set, traffic will continue to be routed in slow path. 2) TTL Error: Trap packets whose TTL expired. This allows traceroute to work properly. 3) OSPF packets. Fixes: 7b27ce7bb9cd ("mlxsw: spectrum: Add traps needed for router implementation") Signed-off-by: Elad Raz Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 12681db..6b69c8a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2651,6 +2651,21 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { { .func = mlxsw_sp_rx_listener_func, .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MTUERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_TTLERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_OSPF, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_IP2ME, }, { diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 470d769..9508e0a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -56,6 +56,9 @@ enum { MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, MLXSW_TRAP_ID_ARPBC = 0x50, MLXSW_TRAP_ID_ARPUC = 0x51, + MLXSW_TRAP_ID_MTUERROR = 0x52, + MLXSW_TRAP_ID_TTLERROR = 0x53, + MLXSW_TRAP_ID_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, -- cgit v0.10.2 From a94a614fa2bd32848a67f8261228e193beb826ca Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:33 +0200 Subject: mlxsw: spectrum: Trap loop-backed packets One of the conditions to generate an ICMP Redirect Message is that "the packet is being forwarded out the same physical interface that it was received from" (RFC 1812). Therefore, we need to be able to trap such packets and let the kernel decide what to do with them. For each RIF, enable the loop-back filter, which will raise the LBERROR trap whenever the ingress RIF equals the egress RIF. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Reported-by: Ilan Tayari Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 7ca9201..a1bd36c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3383,6 +3383,15 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); +/* reg_ritr_lb_en + * Loop-back filter enable for unicast packets. + * If the flag is set then loop-back filter for unicast packets is + * implemented on the RIF. Multicast packets are always subject to + * loop-back filtering. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1); + /* reg_ritr_virtual_router * Virtual router ID associated with the router interface. * Access: RW @@ -3484,6 +3493,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_mtu_set(payload, mtu); mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6b69c8a..8137daa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2661,6 +2661,11 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { { .func = mlxsw_sp_rx_listener_func, .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LBERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_OSPF, }, { diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 9508e0a..ed8e301 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -58,6 +58,7 @@ enum { MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, + MLXSW_TRAP_ID_LBERROR = 0x54, MLXSW_TRAP_ID_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, -- cgit v0.10.2 From 0e7df1a290abbcf3ecf697bbbbd4549c9a113db0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 Aug 2016 16:39:34 +0200 Subject: mlxsw: reg: Fix missing op field fill-up Ralue pack function needs to set op, otherwise it is 0 for add always. Fixes: d5a1c749d22 ("mlxsw: reg: Add Router Algorithmic LPM Unicast Entry Register definition") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index a1bd36c..1721098 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4010,6 +4010,7 @@ static inline void mlxsw_reg_ralue_pack(char *payload, { MLXSW_REG_ZERO(ralue, payload); mlxsw_reg_ralue_protocol_set(payload, protocol); + mlxsw_reg_ralue_op_set(payload, op); mlxsw_reg_ralue_virtual_router_set(payload, virtual_router); mlxsw_reg_ralue_prefix_len_set(payload, prefix_len); mlxsw_reg_ralue_entry_type_set(payload, -- cgit v0.10.2 From 0583272d91f0f4e21f1eb666786286863185be7e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:35 +0200 Subject: mlxsw: spectrum: Add missing rollbacks in error path While going over the code I noticed we are missing two rollbacks in the port's creation error path. Add them and adjust the place of one of them in the port's removal sequence so that both are symmetric. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8137daa..1fe9fbd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2206,6 +2206,7 @@ err_core_port_init: unregister_netdev(dev); err_register_netdev: mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); err_port_pvid_vport_create: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); @@ -2215,6 +2216,7 @@ err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: err_port_speed_by_width_set: + mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: err_port_system_port_mapping_set: err_dev_addr_init: @@ -2237,9 +2239,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); - mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); -- cgit v0.10.2 From 8168287b5dfac9227a549ed87f5e111b7005e8a4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:36 +0200 Subject: mlxsw: spectrum: Unmap 802.1Q FID before destroying it Before destroying the 802.1Q FID we should first remove the VID-to-FID mapping. This makes mlxsw_sp_fid_destroy() symmetric with regards to mlxsw_sp_fid_create(). Fixes: 14d39461b3f4 ("mlxsw: spectrum: Use per-FID struct for the VLAN-aware bridge") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index b5e864d..d1b59cd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -450,6 +450,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) kfree(f); + mlxsw_sp_fid_map(mlxsw_sp, fid, false); + mlxsw_sp_fid_op(mlxsw_sp, fid, false); } -- cgit v0.10.2 From 9ffcc3725f096e9f0d985f738b0e44214cd72d93 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Aug 2016 16:39:37 +0200 Subject: mlxsw: spectrum: Allow packets to be trapped from any PG When packets enter the device they are classified to a priority group (PG) buffer based on their PCP value. After their egress port and traffic class are determined they are moved to the switch's shared buffer and await transmission, if: (Ingress{Port}.Usage < Thres && Ingress{Port,PG}.Usage < Thres && Egress{Port}.Usage < Thres && Egress{Port,TC}.Usage < Thres) || (Ingress{Port}.Usage < Min || Ingress{Port,PG} < Min || Egress{Port}.Usage < Min || Egress{Port,TC}.Usage < Min) Packets scheduled to transmission through CPU port (trapped to CPU) use traffic class 7, which has a zero maximum and minimum quotas. However, when such packets arrive from PG 0 they are admitted to the shared buffer as PG 0 has a non-zero minimum quota. Allow all packets to be trapped to the CPU - regardless of the PG they were classified to - by assigning a 10KB minimum quota for CPU port and TC7. Fixes: 8e8dfe9fdf06 ("mlxsw: spectrum: Add IEEE 802.1Qaz ETS support") Reported-by: Tamir Winetroub Tested-by: Tamir Winetroub Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 074cdda..237418a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -330,7 +330,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, -- cgit v0.10.2 From f07fed82ad7994cc4d779ee79bdf7a46848c4b8f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 13 Aug 2016 22:34:56 -0700 Subject: net_sched: remove the leftover cleanup_a() After refactoring tc_action into tcf_common, we no longer need to cleanup temporary "actions" in list, they are permanently stored in the hashtable. Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common") Reported-by: Jamal Hadi Salim Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e4a5f26..cce6986 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -754,16 +754,6 @@ err_out: return ERR_PTR(err); } -static void cleanup_a(struct list_head *actions) -{ - struct tc_action *a, *tmp; - - list_for_each_entry_safe(a, tmp, actions, list) { - list_del(&a->list); - kfree(a); - } -} - static int tca_action_flush(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid) { @@ -905,7 +895,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, return ret; } err: - cleanup_a(&actions); + tcf_action_destroy(&actions, 0); return ret; } @@ -942,15 +932,9 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions); if (ret) - goto done; + return ret; - /* dump then free all the actions after update; inserted policy - * stays intact - */ - ret = tcf_add_notify(net, n, &actions, portid); - cleanup_a(&actions); -done: - return ret; + return tcf_add_notify(net, n, &actions, portid); } static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) -- cgit v0.10.2 From 824a7e8863b3eb283343f891b11a782b4ec0d0de Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 13 Aug 2016 22:34:57 -0700 Subject: net_sched: remove an unnecessary list_del() This list_del() for tc action is not needed actually, because we only use this list to chain bulk operations, therefore should not be carried for latter operations. Fixes: ec0595cc4495 ("net_sched: get rid of struct tcf_common") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cce6986..b4c7be3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -64,7 +64,6 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { if (p->ops->cleanup) p->ops->cleanup(p, bind); - list_del(&p->list); tcf_hash_destroy(p->hinfo, p); ret = ACT_P_DELETED; } -- cgit v0.10.2 From 0c23c3e705691cfb99c94f2760df2b456fe45194 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 13 Aug 2016 22:34:58 -0700 Subject: net_sched: fix a typo in tc_for_each_action() It is harmless because all users pass 'a' to this macro. Fixes: 00175aec941e ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef") Cc: Amir Vadai Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/include/net/act_api.h b/include/net/act_api.h index 41e6a24..f53ee9d 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -193,7 +193,7 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); (list_empty(&(_exts)->actions)) #define tc_for_each_action(_a, _exts) \ - list_for_each_entry(a, &(_exts)->actions, list) + list_for_each_entry(_a, &(_exts)->actions, list) #define tc_single_action(_exts) \ (list_is_singular(&(_exts)->actions)) -- cgit v0.10.2 From 2734437ef3c2943090d0914bf91caa6b30451615 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 13 Aug 2016 22:34:59 -0700 Subject: net_sched: move tc offload macros to pkt_cls.h struct tcf_exts belongs to filters, should not be visible to plain tc actions. Cc: Ido Schimmel Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/include/net/act_api.h b/include/net/act_api.h index f53ee9d..870332f 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -189,30 +189,17 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); -#define tc_no_actions(_exts) \ - (list_empty(&(_exts)->actions)) - -#define tc_for_each_action(_a, _exts) \ - list_for_each_entry(_a, &(_exts)->actions, list) - -#define tc_single_action(_exts) \ - (list_is_singular(&(_exts)->actions)) +#endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 lastuse) { +#ifdef CONFIG_NET_CLS_ACT if (!a->ops->stats_update) return; a->ops->stats_update(a, bytes, packets, lastuse); +#endif } -#else /* CONFIG_NET_CLS_ACT */ - -#define tc_no_actions(_exts) true -#define tc_for_each_action(_a, _exts) while ((void)(_a), 0) -#define tc_single_action(_exts) false -#define tcf_action_stats_update(a, bytes, packets, lastuse) - -#endif /* CONFIG_NET_CLS_ACT */ #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 6f8d653..00dd5c4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -130,6 +130,25 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return 0; } +#ifdef CONFIG_NET_CLS_ACT + +#define tc_no_actions(_exts) \ + (list_empty(&(_exts)->actions)) + +#define tc_for_each_action(_a, _exts) \ + list_for_each_entry(_a, &(_exts)->actions, list) + +#define tc_single_action(_exts) \ + (list_is_singular(&(_exts)->actions)) + +#else /* CONFIG_NET_CLS_ACT */ + +#define tc_no_actions(_exts) true +#define tc_for_each_action(_a, _exts) while ((void)(_a), 0) +#define tc_single_action(_exts) false + +#endif /* CONFIG_NET_CLS_ACT */ + int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); -- cgit v0.10.2 From 22dc13c837c33207548c8ee5116b64e2930a6e23 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 13 Aug 2016 22:35:00 -0700 Subject: net_sched: convert tcf_exts from list to pointer array As pointed out by Jamal, an action could be shared by multiple filters, so we can't use list to chain them any more after we get rid of the original tc_action. Instead, we could just save pointers to these actions in tcf_exts, since they are refcount'ed, so convert the list to an array of pointers. The "ugly" part is the action API still accepts list as a parameter, I just introduce a helper function to convert the array of pointers to a list, instead of relying on the C99 feature to iterate the array. Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common") Reported-by: Jamal Hadi Salim Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ee57a89..b4f0374 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8396,12 +8396,14 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, struct tcf_exts *exts, u64 *action, u8 *queue) { const struct tc_action *a; + LIST_HEAD(actions); int err; if (tc_no_actions(exts)) return -EINVAL; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Drop action */ if (is_tcf_gact_shot(a)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0f19b01..dc8b1cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -318,6 +318,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, u32 *action, u32 *flow_tag) { const struct tc_action *a; + LIST_HEAD(actions); if (tc_no_actions(exts)) return -EINVAL; @@ -325,7 +326,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; *action = 0; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ if (*action) return -EINVAL; @@ -362,13 +364,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, u32 *action, u32 *dest_vport) { const struct tc_action *a; + LIST_HEAD(actions); if (tc_no_actions(exts)) return -EINVAL; *action = 0; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ if (*action) return -EINVAL; @@ -503,6 +507,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; struct tc_action *a; struct mlx5_fc *counter; + LIST_HEAD(actions); u64 bytes; u64 packets; u64 lastuse; @@ -518,7 +523,8 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); - tc_for_each_action(a, f->exts) + tcf_exts_to_list(f->exts, &actions); + list_for_each_entry(a, &actions, list) tcf_action_stats_update(a, bytes, packets, lastuse); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1fe9fbd..1f81689 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1121,6 +1121,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress) { const struct tc_action *a; + LIST_HEAD(actions); int err; if (!tc_single_action(cls->exts)) { @@ -1128,7 +1129,8 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOTSUPP; } - tc_for_each_action(a, cls->exts) { + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL)) return -ENOTSUPP; diff --git a/include/net/act_api.h b/include/net/act_api.h index 870332f..82f3c91 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -176,8 +176,8 @@ int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops); int tcf_action_destroy(struct list_head *actions, int bind); -int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, - struct tcf_result *res); +int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, + int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *n, int ovr, int bind, struct list_head *); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 00dd5c4..c99508d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -59,7 +59,8 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - struct list_head actions; + int nr_actions; + struct tc_action **actions; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -72,7 +73,10 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police) { #ifdef CONFIG_NET_CLS_ACT exts->type = 0; - INIT_LIST_HEAD(&exts->actions); + exts->nr_actions = 0; + exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), + GFP_KERNEL); + WARN_ON(!exts->actions); /* TODO: propagate the error to callers */ #endif exts->action = action; exts->police = police; @@ -89,7 +93,7 @@ static inline int tcf_exts_is_predicative(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - return !list_empty(&exts->actions); + return exts->nr_actions; #else return 0; #endif @@ -108,6 +112,20 @@ tcf_exts_is_available(struct tcf_exts *exts) return tcf_exts_is_predicative(exts); } +static inline void tcf_exts_to_list(const struct tcf_exts *exts, + struct list_head *actions) +{ +#ifdef CONFIG_NET_CLS_ACT + int i; + + for (i = 0; i < exts->nr_actions; i++) { + struct tc_action *a = exts->actions[i]; + + list_add(&a->list, actions); + } +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer @@ -124,27 +142,21 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, struct tcf_result *res) { #ifdef CONFIG_NET_CLS_ACT - if (!list_empty(&exts->actions)) - return tcf_action_exec(skb, &exts->actions, res); + if (exts->nr_actions) + return tcf_action_exec(skb, exts->actions, exts->nr_actions, + res); #endif return 0; } #ifdef CONFIG_NET_CLS_ACT -#define tc_no_actions(_exts) \ - (list_empty(&(_exts)->actions)) - -#define tc_for_each_action(_a, _exts) \ - list_for_each_entry(_a, &(_exts)->actions, list) - -#define tc_single_action(_exts) \ - (list_is_singular(&(_exts)->actions)) +#define tc_no_actions(_exts) ((_exts)->nr_actions == 0) +#define tc_single_action(_exts) ((_exts)->nr_actions == 1) #else /* CONFIG_NET_CLS_ACT */ #define tc_no_actions(_exts) true -#define tc_for_each_action(_a, _exts) while ((void)(_a), 0) #define tc_single_action(_exts) false #endif /* CONFIG_NET_CLS_ACT */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index b4c7be3..d09d068 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -420,18 +420,19 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) return res; } -int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, - struct tcf_result *res) +int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, + int nr_actions, struct tcf_result *res) { - const struct tc_action *a; - int ret = -1; + int ret = -1, i; if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); ret = TC_ACT_OK; goto exec_done; } - list_for_each_entry(a, actions, list) { + for (i = 0; i < nr_actions; i++) { + const struct tc_action *a = actions[i]; + repeat: ret = a->ops->act(skb, a, res); if (ret == TC_ACT_REPEAT) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 843a716..a7c5645 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -541,8 +541,12 @@ out: void tcf_exts_destroy(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND); - INIT_LIST_HEAD(&exts->actions); + LIST_HEAD(actions); + + tcf_exts_to_list(exts, &actions); + tcf_action_destroy(&actions, TCA_ACT_UNBIND); + kfree(exts->actions); + exts->nr_actions = 0; #endif } EXPORT_SYMBOL(tcf_exts_destroy); @@ -554,7 +558,6 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, { struct tc_action *act; - INIT_LIST_HEAD(&exts->actions); if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, "police", ovr, @@ -563,14 +566,20 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, return PTR_ERR(act); act->type = exts->type = TCA_OLD_COMPAT; - list_add(&act->list, &exts->actions); + exts->actions[0] = act; + exts->nr_actions = 1; } else if (exts->action && tb[exts->action]) { - int err; + LIST_HEAD(actions); + int err, i = 0; + err = tcf_action_init(net, tb[exts->action], rate_tlv, NULL, ovr, - TCA_ACT_BIND, &exts->actions); + TCA_ACT_BIND, &actions); if (err) return err; + list_for_each_entry(act, &actions, list) + exts->actions[i++] = act; + exts->nr_actions = i; } } #else @@ -587,37 +596,49 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT - LIST_HEAD(tmp); + struct tcf_exts old = *dst; + tcf_tree_lock(tp); - list_splice_init(&dst->actions, &tmp); - list_splice(&src->actions, &dst->actions); + dst->nr_actions = src->nr_actions; + dst->actions = src->actions; dst->type = src->type; tcf_tree_unlock(tp); - tcf_action_destroy(&tmp, TCA_ACT_UNBIND); + + tcf_exts_destroy(&old); #endif } EXPORT_SYMBOL(tcf_exts_change); -#define tcf_exts_first_act(ext) \ - list_first_entry_or_null(&(exts)->actions, \ - struct tc_action, list) +#ifdef CONFIG_NET_CLS_ACT +static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts) +{ + if (exts->nr_actions == 0) + return NULL; + else + return exts->actions[0]; +} +#endif int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; - if (exts->action && !list_empty(&exts->actions)) { + if (exts->action && exts->nr_actions) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { + LIST_HEAD(actions); + nest = nla_nest_start(skb, exts->action); if (nest == NULL) goto nla_put_failure; - if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0) + + tcf_exts_to_list(exts, &actions); + if (tcf_action_dump(skb, &actions, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } else if (exts->police) { -- cgit v0.10.2 From 0852e455238f8550fa92b1e40355eb2c6805787e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 13 Aug 2016 22:35:01 -0700 Subject: net_sched: unify the init logic for act_police Jamal reported a crash when we create a police action with a specific index, this is because the init logic is not correct, we should always create one for this case. Just unify the logic with other tc actions. Fixes: a03e6fe56971 ("act_police: fix a crash during removal") Reported-by: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b3c7e97..259352d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -125,6 +125,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; struct tc_action_net *tn = net_generic(net, police_net_id); + bool exists = false; int size; if (nla == NULL) @@ -139,24 +140,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, size = nla_len(tb[TCA_POLICE_TBF]); if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) return -EINVAL; + parm = nla_data(tb[TCA_POLICE_TBF]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; - if (parm->index) { - if (tcf_hash_check(tn, parm->index, a, bind)) { - if (ovr) - goto override; - /* not replacing */ - return -EEXIST; - } - } else { + if (!exists) { ret = tcf_hash_create(tn, parm->index, NULL, a, &act_police_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; } -override: police = to_police(*a); if (parm->rate.rate) { err = -ENOMEM; -- cgit v0.10.2 From b5ac851885accffe0485aea2805df8f2d49c95a8 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Sat, 13 Aug 2016 22:35:02 -0700 Subject: net_sched: allow flushing tc police actions The act_police uses its own code to walk the action hashtable, which leads to that we could not flush standalone tc police actions, so just switch to tcf_generic_walker() like other actions. (Joint work from Roman and Cong.) Signed-off-by: Roman Mashak Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 259352d..8a3be1d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -63,49 +63,8 @@ static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops) { struct tc_action_net *tn = net_generic(net, police_net_id); - struct tcf_hashinfo *hinfo = tn->hinfo; - int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; - struct nlattr *nest; - - spin_lock_bh(&hinfo->lock); - - s_i = cb->args[0]; - - for (i = 0; i < (POL_TAB_MASK + 1); i++) { - struct hlist_head *head; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)]; - - hlist_for_each_entry_rcu(p, head, tcfa_head) { - index++; - if (index < s_i) - continue; - nest = nla_nest_start(skb, index); - if (nest == NULL) - goto nla_put_failure; - if (type == RTM_DELACTION) - err = tcf_action_dump_1(skb, p, 0, 1); - else - err = tcf_action_dump_1(skb, p, 0, 0); - if (err < 0) { - index--; - nla_nest_cancel(skb, nest); - goto done; - } - nla_nest_end(skb, nest); - n_i++; - } - } -done: - spin_unlock_bh(&hinfo->lock); - if (n_i) - cb->args[0] += n_i; - return n_i; -nla_put_failure: - nla_nest_cancel(skb, nest); - goto done; + return tcf_generic_walker(tn, skb, cb, type, ops); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { -- cgit v0.10.2 From b825b44c4ef4dabfdaf4e82db2263d377ac45d67 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:25 -0700 Subject: nvmet-rdma: +1 to *queue_size from hsqsize/hrqsize The host will be sending sqsize 0-based hsqsize value, the target need to be adjusted as well. Signed-off-by: Jay Freyensee Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5de8d0a..1cbe6e0 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1004,10 +1004,10 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn, queue->host_qid = le16_to_cpu(req->qid); /* - * req->hsqsize corresponds to our recv queue size + * req->hsqsize corresponds to our recv queue size plus 1 * req->hrqsize corresponds to our send queue size */ - queue->recv_queue_size = le16_to_cpu(req->hsqsize); + queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; queue->send_queue_size = le16_to_cpu(req->hrqsize); if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH) -- cgit v0.10.2 From f994d9dc28bc27353acde2caaf718222d92a3e24 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:26 -0700 Subject: fabrics: define admin sqsize min default, per spec Upon admin queue connect(), the rdma qp was being set based on NVMF_AQ_DEPTH. However, the fabrics layer was using the sqsize field value set for I/O queues for the admin queue, which threw the nvme layer and rdma layer off-whack: root@fedora23-fabrics-host1 nvmf]# dmesg [ 3507.798642] nvme_fabrics: nvmf_connect_admin_queue():admin sqsize being sent is: 128 [ 3507.798858] nvme nvme0: creating 16 I/O queues. [ 3507.896407] nvme nvme0: new ctrl: NQN "nullside-nqn", addr 192.168.1.3:4420 Thus, to have a different admin queue value, we use NVMF_AQ_DEPTH for connect() and RDMA private data as the minimum depth specified in the NVMe-over-Fabrics 1.0 spec (and in that RDMA private data we treat hrqsize as 1's-based value, per current understanding of the fabrics spec). Reported-by: Daniel Verkamp Signed-off-by: Jay Freyensee Reviewed-by: Daniel Verkamp Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index dc99676..020302c 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -363,7 +363,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) cmd.connect.opcode = nvme_fabrics_command; cmd.connect.fctype = nvme_fabrics_type_connect; cmd.connect.qid = 0; - cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize); + + /* + * fabrics spec sets a minimum of depth 32 for admin queue, + * so set the queue with this depth always until + * justification otherwise. + */ + cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); + /* * Set keep-alive timeout in seconds granularity (ms * 1000) * and add a grace period for controller kato enforcement diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9c69393..d44809e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1278,8 +1278,17 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue)); - priv.hrqsize = cpu_to_le16(queue->queue_size); - priv.hsqsize = cpu_to_le16(queue->queue_size); + /* + * set the admin queue depth to the minimum size + * specified by the Fabrics standard. + */ + if (priv.qid == 0) { + priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); + priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); + } else { + priv.hrqsize = cpu_to_le16(queue->queue_size); + priv.hsqsize = cpu_to_le16(queue->queue_size); + } ret = rdma_connect(queue->cm_id, ¶m); if (ret) { -- cgit v0.10.2 From c5af8654c422cfdd8480be3a244748e18cace6c5 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:27 -0700 Subject: nvme-rdma: fix sqsize/hsqsize per spec Per NVMe-over-Fabrics 1.0 spec, sqsize is represented as a 0-based value. Also per spec, the RDMA binding values shall be set to sqsize, which makes hsqsize 0-based values. Thus, the sqsize during NVMf connect() is now: [root@fedora23-fabrics-host1 for-48]# dmesg [ 318.720645] nvme_fabrics: nvmf_connect_admin_queue(): sqsize for admin queue: 31 [ 318.720884] nvme nvme0: creating 16 I/O queues. [ 318.810114] nvme_fabrics: nvmf_connect_io_queue(): sqsize for i/o queue: 127 Finally, current interpretation implies hrqsize is 1's based so set it appropriately. Reported-by: Daniel Verkamp Signed-off-by: Jay Freyensee Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d44809e..c133256 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -645,7 +645,8 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.sqsize); + ret = nvme_rdma_init_queue(ctrl, i, + ctrl->ctrl.opts->queue_size); if (ret) { dev_info(ctrl->ctrl.device, "failed to initialize i/o queue: %d\n", ret); @@ -1286,8 +1287,13 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); } else { + /* + * current interpretation of the fabrics spec + * is at minimum you make hrqsize sqsize+1, or a + * 1's based representation of sqsize. + */ priv.hrqsize = cpu_to_le16(queue->queue_size); - priv.hsqsize = cpu_to_le16(queue->queue_size); + priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); } ret = rdma_connect(queue->cm_id, ¶m); @@ -1825,7 +1831,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_rdma_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@ -1923,7 +1929,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, spin_lock_init(&ctrl->lock); ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ - ctrl->ctrl.sqsize = opts->queue_size; + ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ret = -ENOMEM; -- cgit v0.10.2 From eadb7cf44105ae8250f0d638dc880c3ed511c4e2 Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 17 Aug 2016 15:00:28 -0700 Subject: nvme-loop: set sqsize to 0-based value, per spec Signed-off-by: Jay Freyensee Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 7affd40..395e60d 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -556,7 +556,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_loop_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@ -620,7 +620,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ret = -ENOMEM; - ctrl->ctrl.sqsize = opts->queue_size; + ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues), -- cgit v0.10.2 From 88b2f634028f1f38dcc3d412e10ff1f224976daa Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 17 Aug 2016 13:33:14 +0200 Subject: x86/microcode/AMD: Fix initrd loading with CONFIG_RANDOMIZE_MEMORY=y Similar to: efaad554b4ff ("x86/microcode/intel: Fix initrd loading with CONFIG_RANDOMIZE_MEMORY=y") ... fix microcode loading from the initrd on AMD by adding the randomization offset to the microcode patch container within the initrd. Reported-and-tested-by: Brian Gerst Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-tip-commits@vger.kernel.org Link: http://lkml.kernel.org/r/20160817113314.GA19221@nazgul.tnic Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 27a0228..b816971 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -355,6 +355,7 @@ void load_ucode_amd_ap(void) unsigned int cpu = smp_processor_id(); struct equiv_cpu_entry *eq; struct microcode_amd *mc; + u8 *cont = container; u32 rev, eax; u16 eq_id; @@ -371,8 +372,11 @@ void load_ucode_amd_ap(void) if (check_current_patch_level(&rev, false)) return; + /* Add CONFIG_RANDOMIZE_MEMORY offset. */ + cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; + eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); + eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); eq_id = find_equiv_id(eq, eax); if (!eq_id) @@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void) else container = cont_va; + /* Add CONFIG_RANDOMIZE_MEMORY offset. */ + container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); -- cgit v0.10.2 From 7b0501b1e7cddd32b265178e32d332bdfbb532d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Aug 2016 12:17:00 +0200 Subject: x86/smp: Fix __max_logical_packages value setup Frank reported kernel panic when he disabled several cores in BIOS via following option: Core Disable Bitmap(Hex) [0] with number 0xFFE, which leaves 16 CPUs in system (out of 48). The kernel panic below goes along with following messages: smpboot: Max logical packages: 2^M smpboot: APIC(0) Converting physical 0 to logical package 0^M smpboot: APIC(20) Converting physical 1 to logical package 1^M smpboot: APIC(40) Package 2 exceeds logical package map^M smpboot: CPU 8 APICId 40 disabled^M smpboot: APIC(60) Package 3 exceeds logical package map^M smpboot: CPU 12 APICId 60 disabled^M ... general protection fault: 0000 [#1] SMP^M Modules linked in:^M CPU: 15 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc5+ #1^M Hardware name: SGI UV300/UV300, BIOS SGI UV 300 series BIOS 05/25/2016^M task: ffff8801673e0000 ti: ffff8801673ac000 task.ti: ffff8801673ac000^M RIP: 0010:[] [] uncore_change_context+0xd4/0x180^M ... [] uncore_event_init_cpu+0x6c/0x70^M [] intel_uncore_init+0x1c2/0x2dd^M [] ? uncore_cpu_setup+0x17/0x17^M [] do_one_initcall+0x50/0x190^M [] ? parse_args+0x293/0x480^M [] kernel_init_freeable+0x1a5/0x249^M [] ? set_debug_rodata+0x12/0x12^M [] kernel_init+0xe/0x110^M [] ret_from_fork+0x1f/0x40^M [] ? rest_init+0x80/0x80^M The reason for the panic is wrong value of __max_logical_packages, which lets logical_package_map uninitialized and the uncore code relying on this map being properly initialized (maybe we should add some safety checks there as well). The __max_logical_packages is computed as: DIV_ROUND_UP(total_cpus, ncpus); - ncpus being number of cores With above BIOS setup we get total_cpus == 16 which set __max_logical_packages to 2 (ncpus is 12). Once topology_update_package_map processes CPU with logical pkg over 2 we display above messages and fail to initialize the physical_to_logical_pkg map, which makes the uncore code crash. The fix is to remove logical_package_map bitmap completely and keep and update the logical_packages number instead. After we enumerate all the present CPUs, we check if the enumerated logical packages count is within its computed maximum from BIOS data. If it's not the case, we set this maximum to the new enumerated value and freeze any new addition of logical packages. The freeze is because lot of init code like uncore/rapl/cqm depends on having maximum logical package value set to allocate their data, so we can't change it later on. Prarit Bhargava tested the patch and confirms that it solves the problem: From dmidecode: Core Count: 24 Core Enabled: 24 Thread Count: 48 Orig kernel boot log: [ 0.464981] smpboot: Max logical packages: 19 [ 0.469861] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.477261] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.484760] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.492258] smpboot: APIC(c0) Converting physical 3 to logical package 3 1. nr_cpus=8, should stop enumerating in package 0: [ 0.533664] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.539596] smpboot: Max logical packages: 19 2. max_cpus=8, should still enumerate all packages: [ 0.526494] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.532428] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.538456] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.544486] smpboot: APIC(c0) Converting physical 3 to logical package 3 [ 0.550524] smpboot: Max logical packages: 19 3. nr_cpus=49 ( 2 socket + 1 core on 3rd socket), should stop enumerating in package 2: [ 0.521378] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.527314] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.533345] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.539368] smpboot: Max logical packages: 19 4. maxcpus=49, should still enumerate all packages: [ 0.525591] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.531525] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.537547] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.543579] smpboot: APIC(c0) Converting physical 3 to logical package 3 [ 0.549624] smpboot: Max logical packages: 19 5. kdump (nr_cpus=1) works as well. Reported-by: Frank Ramsay Tested-by: Prarit Bhargava Signed-off-by: Jiri Olsa Reviewed-by: Prarit Bhargava Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160815101700.GA30090@krava Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2a6e84a..4296beb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ static int *physical_to_logical_pkg __read_mostly; static unsigned long *physical_package_map __read_mostly;; -static unsigned long *logical_package_map __read_mostly; static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +static unsigned int logical_packages __read_mostly; +static bool logical_packages_frozen __read_mostly; /* Maximum number of SMT threads on any online core */ int __max_smt_threads __read_mostly; @@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu) if (test_and_set_bit(pkg, physical_package_map)) goto found; - new = find_first_zero_bit(logical_package_map, __max_logical_packages); - if (new >= __max_logical_packages) { + if (logical_packages_frozen) { physical_to_logical_pkg[pkg] = -1; - pr_warn("APIC(%x) Package %u exceeds logical package map\n", + pr_warn("APIC(%x) Package %u exceeds logical package max\n", apicid, pkg); return -ENOSPC; } - set_bit(new, logical_package_map); + + new = logical_packages++; pr_info("APIC(%x) Converting physical %u to logical package %u\n", apicid, pkg, new); physical_to_logical_pkg[pkg] = new; @@ -341,6 +342,7 @@ static void __init smp_init_package_map(void) } __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); + logical_packages = 0; /* * Possibly larger than what we need as the number of apic ids per @@ -352,10 +354,6 @@ static void __init smp_init_package_map(void) memset(physical_to_logical_pkg, 0xff, size); size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); physical_package_map = kzalloc(size, GFP_KERNEL); - size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long); - logical_package_map = kzalloc(size, GFP_KERNEL); - - pr_info("Max logical packages: %u\n", __max_logical_packages); for_each_present_cpu(cpu) { unsigned int apicid = apic->cpu_present_to_apicid(cpu); @@ -369,6 +367,15 @@ static void __init smp_init_package_map(void) set_cpu_possible(cpu, false); set_cpu_present(cpu, false); } + + if (logical_packages > __max_logical_packages) { + pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n", + logical_packages, __max_logical_packages); + logical_packages_frozen = true; + __max_logical_packages = logical_packages; + } + + pr_info("Max logical packages: %u\n", __max_logical_packages); } void __init smp_store_boot_cpu_info(void) -- cgit v0.10.2 From 173be9a14f7b2e901cf77c18b1aafd4d672e9d9e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Aug 2016 18:38:42 +0200 Subject: sched/cputime: Fix NO_HZ_FULL getrusage() monotonicity regression Mike reports: Roughly 10% of the time, ltp testcase getrusage04 fails: getrusage04 0 TINFO : Expected timers granularity is 4000 us getrusage04 0 TINFO : Using 1 as multiply factor for max [us]time increment (1000+4000us)! getrusage04 0 TINFO : utime: 0us; stime: 179us getrusage04 0 TINFO : utime: 3751us; stime: 0us getrusage04 1 TFAIL : getrusage04.c:133: stime increased > 5000us: And tracked it down to the case where the task simply doesn't get _any_ [us]time ticks. Update the code to assume all rtime is utime when we lack information, thus ensuring a task that elides the tick gets time accounted. Reported-by: Mike Galbraith Tested-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Fredrik Markstrom Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Cc: Rik van Riel Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wanpeng Li Cc: stable@vger.kernel.org # 4.3+ Fixes: 9d7fb0427648 ("sched/cputime: Guarantee stime + utime == rtime") Signed-off-by: Ingo Molnar diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9858266..2ee83b2 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -614,19 +614,25 @@ static void cputime_adjust(struct task_cputime *curr, stime = curr->stime; utime = curr->utime; - if (utime == 0) { - stime = rtime; + /* + * If either stime or both stime and utime are 0, assume all runtime is + * userspace. Once a task gets some ticks, the monotonicy code at + * 'update' will ensure things converge to the observed ratio. + */ + if (stime == 0) { + utime = rtime; goto update; } - if (stime == 0) { - utime = rtime; + if (utime == 0) { + stime = rtime; goto update; } stime = scale_stime((__force u64)stime, (__force u64)rtime, (__force u64)(stime + utime)); +update: /* * Make sure stime doesn't go backwards; this preserves monotonicity * for utime because rtime is monotonic. @@ -649,7 +655,6 @@ static void cputime_adjust(struct task_cputime *curr, stime = rtime - utime; } -update: prev->stime = stime; prev->utime = utime; out: -- cgit v0.10.2 From 03cbc732639ddcad15218c4b2046d255851ff1e3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 17 Aug 2016 10:05:46 +0800 Subject: sched/cputime: Resync steal time when guest & host lose sync Commit: 57430218317e ("sched/cputime: Count actually elapsed irq & softirq time") ... fixed a bug but also triggered a regression: On an i5 laptop, 4 pCPUs, 4vCPUs for one full dynticks guest, there are four CPU hog processes(for loop) running in the guest, I hot-unplug the pCPUs on host one by one until there is only one left, then observe CPU utilization via 'top' in the guest, it shows: 100% st for cpu0(housekeeping) 75% st for other CPUs (nohz full mode) However, w/o this commit it shows the correct 75% for all four CPUs. When a guest is interrupted for a longer amount of time, missed clock ticks are not redelivered later. Because of that, we should not limit the amount of steal time accounted to the amount of time that the calling functions think have passed. However, the interval returned by account_other_time() is NOT rounded down to the nearest jiffy, while the base interval in get_vtime_delta() it is subtracted from is, so the max cputime limit is required to avoid underflow. This patch fixes the regression by limiting the account_other_time() from get_vtime_delta() to avoid underflow, and lets the other three call sites (in account_other_time() and steal_account_process_time()) account however much steal time the host told us elapsed. Suggested-by: Rik van Riel Suggested-by: Paolo Bonzini Signed-off-by: Wanpeng Li Reviewed-by: Rik van Riel Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Radim Krcmar Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1471399546-4069-1-git-send-email-wanpeng.li@hotmail.com [ Improved the changelog. ] Signed-off-by: Ingo Molnar diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 2ee83b2..a846cf8 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime) cpustat[CPUTIME_IDLE] += (__force u64) cputime; } +/* + * When a guest is interrupted for a longer amount of time, missed clock + * ticks are not redelivered later. Due to that, this function may on + * occasion account more time than the calling functions think elapsed. + */ static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) { #ifdef CONFIG_PARAVIRT @@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, * idle, or potentially user or system time. Due to rounding, * other time can exceed ticks occasionally. */ - other = account_other_time(cputime); + other = account_other_time(ULONG_MAX); if (other >= cputime) return; cputime -= other; @@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick) } cputime = cputime_one_jiffy; - steal = steal_account_process_time(cputime); + steal = steal_account_process_time(ULONG_MAX); if (steal >= cputime) return; @@ -516,7 +521,7 @@ void account_idle_ticks(unsigned long ticks) } cputime = jiffies_to_cputime(ticks); - steal = steal_account_process_time(cputime); + steal = steal_account_process_time(ULONG_MAX); if (steal >= cputime) return; @@ -699,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk) unsigned long now = READ_ONCE(jiffies); cputime_t delta, other; + /* + * Unlike tick based timing, vtime based timing never has lost + * ticks, and no need for steal time accounting to make up for + * lost ticks. Vtime accounts a rounded version of actual + * elapsed time. Limit account_other_time to prevent rounding + * errors from causing elapsed vtime to go negative. + */ delta = jiffies_to_cputime(now - tsk->vtime_snap); other = account_other_time(delta); WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); -- cgit v0.10.2 From d048c098218e91ed0e10dfa1f0f80e2567fe4ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Mon, 8 Aug 2016 20:16:22 +0200 Subject: KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit msr bitmap can be used to avoid a VM exit (interception) on guest MSR accesses. In some configurations of VMX controls, the guest can even directly access host's x2APIC MSRs. See SDM 29.5 VIRTUALIZING MSR-BASED APIC ACCESSES. L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI. To do so, L1 would first trick KVM to disable all possible interceptions by enabling APICv features and then would turn those features off; nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would not intercept previously enabled MSRs even though they were not safe with the new configuration. Correctly re-enabling interceptions is not enough as a second bug would still allow L1+L2 to access host's MSRs: msr bitmap was shared for all VMCSs, so L1 could trigger a race to get the desired combination of msr bitmap and VMX controls. This fix allocates a msr bitmap for every L1 VCPU, allows only safe x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would have to intercept everything anyway. Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap") Reported-by: Jim Mattson Suggested-by: Wincy Van Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a45d858..c66ac2c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -435,6 +435,8 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; + unsigned long *msr_bitmap; + struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_nested; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) unsigned long *msr_bitmap; if (is_guest_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_nested; + msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; else if (cpu_has_secondary_exec_ctrls() && (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { @@ -6363,13 +6364,6 @@ static __init int hardware_setup(void) if (!vmx_msr_bitmap_longmode_x2apic) goto out4; - if (nested) { - vmx_msr_bitmap_nested = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_nested) - goto out5; - } - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) goto out6; @@ -6392,8 +6386,6 @@ static __init int hardware_setup(void) memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (nested) - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; @@ -6529,9 +6521,6 @@ out8: out7: free_page((unsigned long)vmx_vmread_bitmap); out6: - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); -out5: free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: free_page((unsigned long)vmx_msr_bitmap_longmode); @@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void) free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); free_page((unsigned long)vmx_vmread_bitmap); - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); free_kvm_area(); } @@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->nested.msr_bitmap) + goto out_msr_bitmap; + } + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) - return -ENOMEM; + goto out_cached_vmcs12; if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); - if (!shadow_vmcs) { - kfree(vmx->nested.cached_vmcs12); - return -ENOMEM; - } + if (!shadow_vmcs) + goto out_shadow_vmcs; /* mark vmcs as shadow */ shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ @@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; + +out_shadow_vmcs: + kfree(vmx->nested.cached_vmcs12); + +out_cached_vmcs12: + free_page((unsigned long)vmx->nested.msr_bitmap); + +out_msr_bitmap: + return -ENOMEM; } /* @@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); + if (vmx->nested.msr_bitmap) { + free_page((unsigned long)vmx->nested.msr_bitmap); + vmx->nested.msr_bitmap = NULL; + } if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); kfree(vmx->nested.cached_vmcs12); @@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, { int msr; struct page *page; - unsigned long *msr_bitmap; + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) return false; @@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, WARN_ON(1); return false; } - msr_bitmap = (unsigned long *)kmap(page); - if (!msr_bitmap) { + msr_bitmap_l1 = (unsigned long *)kmap(page); + if (!msr_bitmap_l1) { nested_release_page_clean(page); WARN_ON(1); return false; } + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { if (nested_cpu_has_apic_reg_virt(vmcs12)) for (msr = 0x800; msr <= 0x8ff; msr++) nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, msr, MSR_TYPE_R); - /* TPR is allowed */ - nested_vmx_disable_intercept_for_msr(msr_bitmap, - vmx_msr_bitmap_nested, + + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_TASKPRI >> 4), MSR_TYPE_R | MSR_TYPE_W); + if (nested_cpu_has_vid(vmcs12)) { - /* EOI and self-IPI are allowed */ nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_EOI >> 4), MSR_TYPE_W); nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_SELF_IPI >> 4), MSR_TYPE_W); } - } else { - /* - * Enable reading intercept of all the x2apic - * MSRs. We should not rely on vmcs12 to do any - * optimizations here, it may have been modified - * by L1. - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - msr, - MSR_TYPE_R); - - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_TASKPRI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_EOI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), - MSR_TYPE_W); } kunmap(page); nested_release_page_clean(page); @@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS) { - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); - /* MSR_BITMAP will be set by following vmx_set_efer. */ - } else + exec_control & CPU_BASED_USE_MSR_BITMAPS && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ + else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; /* -- cgit v0.10.2 From dccbfcf52cebb8963246eba5b177b77f26b34da0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Mon, 8 Aug 2016 20:16:23 +0200 Subject: KVM: nVMX: postpone VMCS changes on MSR_IA32_APICBASE write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vmcs12 does not intercept APIC_BASE writes, then KVM will handle the write with vmcs02 as the current VMCS. This will incorrectly apply modifications intended for vmcs01 to vmcs02 and L2 can use it to gain access to L0's x2APIC registers by disabling virtualized x2APIC while using msr bitmap that assumes enabled. Postpone execution of vmx_set_virtual_x2apic_mode until vmcs01 is the current VMCS. An alternative solution would temporarily make vmcs01 the current VMCS, but it requires more care. Fixes: 8d14695f9542 ("x86, apicv: add virtual x2apic support") Reported-by: Jim Mattson Reviewed-by: Wanpeng Li Signed-off-by: Radim Krčmář diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c66ac2c..ae111a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -422,6 +422,7 @@ struct nested_vmx { struct list_head vmcs02_pool; int vmcs02_num; u64 vmcs01_tsc_offset; + bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; /* @@ -8424,6 +8425,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) { u32 sec_exec_control; + /* Postpone execution until vmcs01 is the current VMCS. */ + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true; + return; + } + /* * There is not point to enable virtualize x2apic without enable * apicv @@ -10749,6 +10756,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); + if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { + vmx->nested.change_vmcs01_virtual_x2apic_mode = false; + vmx_set_virtual_x2apic_mode(vcpu, + vcpu->arch.apic_base & X2APIC_ENABLE); + } + /* This is needed for same reason as it was needed in prepare_vmcs02 */ vmx->host_rsp = 0; -- cgit v0.10.2 From c95ba92afb238ac565c68968fc72e38ca8d1b6e8 Mon Sep 17 00:00:00 2001 From: Peter Feiner Date: Wed, 17 Aug 2016 09:36:47 -0700 Subject: kvm: nVMX: fix nested tsc scaling When the host supported TSC scaling, L2 would use a TSC multiplier of 0, which causes a VM entry failure. Now L2's TSC uses the same multiplier as L1. Signed-off-by: Peter Feiner Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ae111a0..5cede40 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2200,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) new.control) != old.control); } +static void decache_tsc_multiplier(struct vcpu_vmx *vmx) +{ + vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); +} + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -2258,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Setup TSC multiplier */ if (kvm_has_tsc_control && - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { - vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; - vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); - } + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) + decache_tsc_multiplier(vmx); vmx_vcpu_pi_load(vcpu, cpu); vmx->host_pkru = read_pkru(); @@ -9999,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); else vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + if (kvm_has_tsc_control) + decache_tsc_multiplier(vmx); if (enable_vpid) { /* @@ -10755,6 +10761,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, else vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); + if (kvm_has_tsc_control) + decache_tsc_multiplier(vmx); if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { vmx->nested.change_vmcs01_virtual_x2apic_mode = false; -- cgit v0.10.2 From a93a4d62324123dcda383bdb4ab89151bbc0e499 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 5 Dec 2014 12:34:54 +0000 Subject: arm64: Fix shift warning in arch/arm64/mm/dump.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with 48-bit VAs and 16K page configuration, it's possible to get the following warning when building the arm64 page table dumping code: arch/arm64/mm/dump.c: In function ‘walk_pud’: arch/arm64/mm/dump.c:274:102: warning: right shift count >= width of type [-Wshift-count-overflow] This is because pud_offset(pgd, 0) performs a shift to the right by 36 while the value 0 has the type 'int' by default, therefore 32-bit. This patch modifies all the p*_offset() uses in arch/arm64/mm/dump.c to use 0UL for the address argument. Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index f94b80e..9c3e75d 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) { - pte_t *pte = pte_offset_kernel(pmd, 0); + pte_t *pte = pte_offset_kernel(pmd, 0UL); unsigned long addr; unsigned i; @@ -254,7 +254,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { - pmd_t *pmd = pmd_offset(pud, 0); + pmd_t *pmd = pmd_offset(pud, 0UL); unsigned long addr; unsigned i; @@ -271,7 +271,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(pgd, 0UL); unsigned long addr; unsigned i; -- cgit v0.10.2 From 5d0bdf2867825a92c0a563957a2fb059149ab0d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Aug 2016 07:11:05 -0700 Subject: PCI: Call pci_intx() when using legacy interrupts in pci_alloc_irq_vectors() ahci currently insists on an explicit call to pci_intx() before falling back from MSI or MSI-X to legacy IRQs. As pci_intx() is a no-op if the command register already contains the right value it seems safe and useful to add this call to pci_alloc_irq_vectors() so that ahci can just use pci_alloc_irq_vectors(). Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9233e7f..593698e 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1200,8 +1200,11 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, } /* use legacy irq if allowed */ - if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) { + pci_intx(dev, 1); return 1; + } + return vecs; } EXPORT_SYMBOL(pci_alloc_irq_vectors); -- cgit v0.10.2 From 12be15dd5ac928b60323b1ed8f6facd7335bb2cc Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 23:13:01 +0800 Subject: netfilter: nfnetlink_acct: fix race between nfacct del and xt_nfacct destroy Suppose that we input the following commands at first: # nfacct add test # iptables -A INPUT -m nfacct --nfacct-name test And now "test" acct's refcnt is 2, but later when we try to delete the "test" nfacct and the related iptables rule at the same time, race maybe happen: CPU0 CPU1 nfnl_acct_try_del nfnl_acct_put atomic_dec_and_test //ref=1,testfail - - atomic_dec_and_test //ref=0,testok - kfree_rcu atomic_inc //ref=1 - So after the rcu grace period, nf_acct will be freed but it is still linked in the nfnl_acct_list, and we can access it later, then oops will happen. Convert atomic_dec_and_test and atomic_inc combinaiton to one atomic operation atomic_cmpxchg here to fix this problem. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 796605b..70eb2f6a 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -326,14 +326,14 @@ static int nfnl_acct_try_del(struct nf_acct *cur) { int ret = 0; - /* we want to avoid races with nfnl_acct_find_get. */ - if (atomic_dec_and_test(&cur->refcnt)) { + /* We want to avoid races with nfnl_acct_put. So only when the current + * refcnt is 1, we decrease it to 0. + */ + if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) { /* We are protected by nfnl mutex. */ list_del_rcu(&cur->head); kfree_rcu(cur, rcu_head); } else { - /* still in use, restore reference counter. */ - atomic_inc(&cur->refcnt); ret = -EBUSY; } return ret; -- cgit v0.10.2 From b75911b66ad508a3c3f006ce37d9f9ebee34da43 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 18 Aug 2016 20:39:05 +0800 Subject: netfilter: cttimeout: fix use after free error when delete netns In general, when we want to delete a netns, cttimeout_net_exit will be called before ipt_unregister_table, i.e. before ctnl_timeout_put. But after call kfree_rcu in cttimeout_net_exit, we will still decrease the timeout object's refcnt in ctnl_timeout_put, this is incorrect, and will cause a use after free error. It is easy to reproduce this problem: # while : ; do ip netns add xxx ip netns exec xxx nfct add timeout testx inet icmp timeout 200 ip netns exec xxx iptables -t raw -p icmp -I OUTPUT -j CT --timeout testx ip netns del xxx done ======================================================================= BUG kmalloc-96 (Tainted: G B E ): Poison overwritten ----------------------------------------------------------------------- INFO: 0xffff88002b5161e8-0xffff88002b5161e8. First byte 0x6a instead of 0x6b INFO: Allocated in cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout] age=104 cpu=0 pid=3330 ___slab_alloc+0x4da/0x540 __slab_alloc+0x20/0x40 __kmalloc+0x1c8/0x240 cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout] nfnetlink_rcv_msg+0x21a/0x230 [nfnetlink] [ ... ] So only when the refcnt decreased to 0, we call kfree_rcu to free the timeout object. And like nfnetlink_acct do, use atomic_cmpxchg to avoid race between ctnl_timeout_try_del and ctnl_timeout_put. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 4cdcd96..68216cd 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -330,16 +330,16 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; - /* we want to avoid races with nf_ct_timeout_find_get. */ - if (atomic_dec_and_test(&timeout->refcnt)) { + /* We want to avoid races with ctnl_timeout_put. So only when the + * current refcnt is 1, we decrease it to 0. + */ + if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { - /* still in use, restore reference counter. */ - atomic_inc(&timeout->refcnt); ret = -EBUSY; } return ret; @@ -543,7 +543,9 @@ err: static void ctnl_timeout_put(struct ctnl_timeout *timeout) { - atomic_dec(&timeout->refcnt); + if (atomic_dec_and_test(&timeout->refcnt)) + kfree_rcu(timeout, rcu_head); + module_put(THIS_MODULE); } #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ @@ -591,7 +593,9 @@ static void __net_exit cttimeout_net_exit(struct net *net) list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { list_del_rcu(&cur->head); nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); + + if (atomic_dec_and_test(&cur->refcnt)) + kfree_rcu(cur, rcu_head); } } -- cgit v0.10.2 From f17b3ea3d2df7c9bf3ce1dbd65b5fd7061f8e787 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Aug 2016 11:50:21 +0200 Subject: Revert "drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference" This reverts commit: fa7d81bb3c269 ("drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference") As Peter explained: [...] lockless_dereference() is _stronger_ than READ_ONCE(), not weaker. [...] Also, clue is in the name: 'dereference', you don't actually dereference the pointer here, only load it. My next patch breaks the compile without this revert, because it assumes you want to deference and thus also need the struct type visible (which it isn't here), so revert it. Tested-by: Paul E. McKenney Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Vetter Cc: Andrew Morton Cc: Chris Wilson Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1470909022-687-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Ingo Molnar diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ce54e98..0a06f91 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper) /* Sometimes user space wants everything disabled, so don't steal the * display if there's a master. */ - if (lockless_dereference(dev->master)) + if (READ_ONCE(dev->master)) return false; drm_for_each_crtc(crtc, dev) { -- cgit v0.10.2 From 112dc0c8069e5554e0ad29c58228f1e6ca49e13d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Aug 2016 11:50:22 +0200 Subject: locking/barriers: Suppress sparse warnings in lockless_dereference() After Peter's commit: 331b6d8c7afc ("locking/barriers: Validate lockless_dereference() is used on a pointer type") ... we get a lot of sparse warnings (one for every rcu_dereference, and more) since the expression here is assigning to the wrong address space. Instead of validating that 'p' is a pointer this way, instead make it fail compilation when it's not by using sizeof(*(p)). This will not cause any sparse warnings (tested, likely since the address space is irrelevant for sizeof), and will fail compilation when 'p' isn't a pointer type. Tested-by: Paul E. McKenney Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 331b6d8c7afc ("locking/barriers: Validate lockless_dereference() is used on a pointer type") Link: http://lkml.kernel.org/r/1470909022-687-2-git-send-email-johannes@sipsolutions.net Signed-off-by: Ingo Molnar diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1bb9548..436aa4e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. * - * The seemingly unused void * variable is to validate @p is indeed a pointer - * type. All pointer types silently cast to void *. + * The seemingly unused size_t variable is to validate @p is indeed a pointer + * type by making sure it can be dereferenced. */ #define lockless_dereference(p) \ ({ \ typeof(p) _________p1 = READ_ONCE(p); \ - __maybe_unused const void * const _________p2 = _________p1; \ + size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) -- cgit v0.10.2 From 479e2a86dc6aeaec6013165e1bd3525db6914f3a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 18 Aug 2016 14:00:59 +0300 Subject: ASoC: omap-mcpdm: Drop pdmclk clock handling This reverts commit 65aca64d05b5eaa5ce15e18b458a8d338ddbd478. The patches for twl6040 MFD and clk missed the merge window and causing the McPDM driver to never probe since it is put back to the deferred list because the missing drivers. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/Documentation/devicetree/bindings/sound/omap-mcpdm.txt b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt index 6f6c2f8..0741dff 100644 --- a/Documentation/devicetree/bindings/sound/omap-mcpdm.txt +++ b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt @@ -8,8 +8,6 @@ Required properties: - interrupts: Interrupt number for McPDM - interrupt-parent: The parent interrupt controller - ti,hwmods: Name of the hwmod associated to the McPDM -- clocks: phandle for the pdmclk provider, likely <&twl6040> -- clock-names: Must be "pdmclk" Example: @@ -21,11 +19,3 @@ mcpdm: mcpdm@40132000 { interrupt-parent = <&gic>; ti,hwmods = "mcpdm"; }; - -In board DTS file the pdmclk needs to be added: - -&mcpdm { - clocks = <&twl6040>; - clock-names = "pdmclk"; - status = "okay"; -}; diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index e7cdc51..74d6e6f 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -55,7 +54,6 @@ struct omap_mcpdm { unsigned long phys_base; void __iomem *io_base; int irq; - struct clk *pdmclk; struct mutex mutex; @@ -390,7 +388,6 @@ static int omap_mcpdm_probe(struct snd_soc_dai *dai) struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); int ret; - clk_prepare_enable(mcpdm->pdmclk); pm_runtime_enable(mcpdm->dev); /* Disable lines while request is ongoing */ @@ -425,7 +422,6 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) pm_runtime_disable(mcpdm->dev); - clk_disable_unprepare(mcpdm->pdmclk); return 0; } @@ -445,8 +441,6 @@ static int omap_mcpdm_suspend(struct snd_soc_dai *dai) mcpdm->pm_active_count++; } - clk_disable_unprepare(mcpdm->pdmclk); - return 0; } @@ -454,8 +448,6 @@ static int omap_mcpdm_resume(struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); - clk_prepare_enable(mcpdm->pdmclk); - if (mcpdm->pm_active_count) { while (mcpdm->pm_active_count--) pm_runtime_get_sync(mcpdm->dev); @@ -549,15 +541,6 @@ static int asoc_mcpdm_probe(struct platform_device *pdev) mcpdm->dev = &pdev->dev; - mcpdm->pdmclk = devm_clk_get(&pdev->dev, "pdmclk"); - if (IS_ERR(mcpdm->pdmclk)) { - if (PTR_ERR(mcpdm->pdmclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; - dev_warn(&pdev->dev, "Error getting pdmclk (%ld)!\n", - PTR_ERR(mcpdm->pdmclk)); - mcpdm->pdmclk = NULL; - } - ret = devm_snd_soc_register_component(&pdev->dev, &omap_mcpdm_component, &omap_mcpdm_dai, 1); -- cgit v0.10.2 From ae5b80d2b68eac945b124227dea34462118a6f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 18 Aug 2016 11:51:14 +0200 Subject: drm/radeon: only apply the SS fractional workaround to RS[78]80 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like some RV6xx have problems with that. bug: https://bugs.freedesktop.org/show_bug.cgi?id=97099 Reviewed-by: Alex Deucher Signed-off-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index a97abc8..1dcf390 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -627,7 +627,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, if (radeon_crtc->ss.refdiv) { radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV; radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv; - if (rdev->family >= CHIP_RV770) + if (ASIC_IS_AVIVO(rdev) && + rdev->family != CHIP_RS780 && + rdev->family != CHIP_RS880) radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; } } -- cgit v0.10.2 From 51c70261b2575962cb9406cd92246b1cee6a3c71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 18 Aug 2016 17:21:37 +0200 Subject: Revert "android: binder: fix dangling pointer comparison" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7b142d8fd0bd4c9bf06ccb72ac4daedb503f0124. It doesn't seem to be correct, no one seems to have tested it, and the email address of the submitter now bounces :( So revert it. Cc: Jann Horn Cc: Chen Feng Cc: stable Cc: Arve Hjønnevåg Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 09fdb42..16288e7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2962,7 +2962,6 @@ static int binder_open(struct inode *nodp, struct file *filp) return -ENOMEM; get_task_struct(current); proc->tsk = current; - atomic_inc(¤t->mm->mm_count); proc->vma_vm_mm = current->mm; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); @@ -3168,7 +3167,6 @@ static void binder_deferred_release(struct binder_proc *proc) vfree(proc->buffer); } - mmdrop(proc->vma_vm_mm); put_task_struct(proc->tsk); binder_debug(BINDER_DEBUG_OPEN_CLOSE, -- cgit v0.10.2 From 87a713c8ffca33d8e497a8b6c02034332bd80394 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Aug 2016 23:54:13 +0200 Subject: 8250/fintek: rename IRQ_MODE macro A bugfix for the fintek driver required defining some macros, but one of them clashes with a system header on ARM: drivers/tty/serial/8250/8250_fintek.c:34:0: error: "IRQ_MODE" redefined [-Werror] #define IRQ_MODE 0x70 In file included from /git/arm-soc/arch/arm/include/asm/ptrace.h:13:0, from /git/arm-soc/arch/arm/include/asm/irqflags.h:6, from /git/arm-soc/include/linux/irqflags.h:15, from /git/arm-soc/arch/arm/include/asm/bitops.h:27, from /git/arm-soc/include/linux/bitops.h:36, from /git/arm-soc/include/linux/kernel.h:10, from /git/arm-soc/include/linux/list.h:8, from /git/arm-soc/include/linux/module.h:9, from /git/arm-soc/drivers/tty/serial/8250/8250_fintek.c:11: arch/arm/include/uapi/asm/ptrace.h:55:0: note: this is the location of the previous definition This renames the newly introduced 'IRQ_MODE' macro to FINTEK_IRQ_MODE. Signed-off-by: Arnd Bergmann Fixes: 4da22f1418cb ("serial: 8250_fintek: fix the mismatched IRQ mode") Link: https://patchwork.kernel.org/patch/9200119/ Acked-by: Ji-Ze Hong (Peter Hong) Acked-by: Ricardo Ribalda Delgado Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 737b4b3..0facc78 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -31,7 +31,7 @@ #define IO_ADDR2 0x60 #define LDN 0x7 -#define IRQ_MODE 0x70 +#define FINTEK_IRQ_MODE 0x70 #define IRQ_SHARE BIT(4) #define IRQ_MODE_MASK (BIT(6) | BIT(5)) #define IRQ_LEVEL_LOW 0 @@ -195,7 +195,7 @@ static int fintek_8250_set_irq_mode(struct fintek_8250 *pdata, bool level_mode) outb(LDN, pdata->base_port + ADDR_PORT); outb(pdata->index, pdata->base_port + DATA_PORT); - outb(IRQ_MODE, pdata->base_port + ADDR_PORT); + outb(FINTEK_IRQ_MODE, pdata->base_port + ADDR_PORT); tmp = inb(pdata->base_port + DATA_PORT); tmp &= ~IRQ_MODE_MASK; -- cgit v0.10.2 From c4e94174983a86c935be1537a73e496b778b0287 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 16 Aug 2016 19:19:11 +0800 Subject: usb: chipidea: udc: don't touch DP when controller is in host mode When the controller is configured to be dual role and it's in host mode, if bind udc and gadgt driver, those gadget operations will do gadget disconnect and finally pull down DP line, which will break host function. Cc: # 4.1+ Signed-off-by: Li Jun Signed-off-by: Peter Chen diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 065f5d9..dfec5a1 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1596,8 +1596,11 @@ static int ci_udc_pullup(struct usb_gadget *_gadget, int is_on) { struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget); - /* Data+ pullup controlled by OTG state machine in OTG fsm mode */ - if (ci_otg_is_fsm_mode(ci)) + /* + * Data+ pullup controlled by OTG state machine in OTG fsm mode; + * and don't touch Data+ in host mode for dual role config. + */ + if (ci_otg_is_fsm_mode(ci) || ci->role == CI_ROLE_HOST) return 0; pm_runtime_get_sync(&ci->gadget.dev); -- cgit v0.10.2 From b873b798af6386f3c7ca1636d4989e9b8f9d1794 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 4 Aug 2016 11:38:25 -0700 Subject: Revert "f2fs: use percpu_rw_semaphore" LKP reported -36.3% regression of fsmark.files_per_sec due to this patch. I've confirmed that fxmark [1] has also slight regression for DWAL. [1] https://github.com/sslab-gatech/fxmark This reverts commit ec795418c41850056feb956534edf059dc1155d4. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 675fa79..14f5fe2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -538,7 +538,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -787,7 +787,7 @@ struct f2fs_sb_info { struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ - struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */ + struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ @@ -1074,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - percpu_down_read(&sbi->cp_rwsem); + down_read(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - percpu_up_read(&sbi->cp_rwsem); + up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - percpu_down_write(&sbi->cp_rwsem); + down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - percpu_up_write(&sbi->cp_rwsem); + up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b2fa4b6..f75d197 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need; } @@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); @@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -342,7 +342,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - percpu_down_write(&nm_i->nat_tree_lock); + if (!down_write_trylock(&nm_i->nat_tree_lock)) + return 0; while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; @@ -351,7 +352,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -373,13 +374,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return; } @@ -403,11 +404,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); /* cache nat entry */ - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } /* @@ -1788,7 +1789,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); while (1) { struct page *page = get_current_nat_page(sbi, nid); @@ -1820,7 +1821,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) remove_free_nid(nm_i, nid); } up_read(&curseg->journal_rwsem); - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2209,7 +2210,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!nm_i->dirty_nat_cnt) return; - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -2232,7 +2233,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set); - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } @@ -2268,8 +2269,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - if (percpu_init_rwsem(&nm_i->nat_tree_lock)) - return -ENOMEM; + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2326,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2351,9 +2351,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); - percpu_free_rwsem(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1b86d3f..7f863a6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -706,8 +706,6 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi) percpu_counter_destroy(&sbi->nr_pages[i]); percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); - - percpu_free_rwsem(&sbi->cp_rwsem); } static void f2fs_put_super(struct super_block *sb) @@ -1483,9 +1481,6 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) { int i, err; - if (percpu_init_rwsem(&sbi->cp_rwsem)) - return -ENOMEM; - for (i = 0; i < NR_COUNT_TYPE; i++) { err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); if (err) @@ -1686,6 +1681,7 @@ try_onemore: sbi->write_io[i].bio = NULL; } + init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); -- cgit v0.10.2 From 3024c9a1fefb3ac0d1b0b078a2e3f2f69478daab Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 6 Aug 2016 21:09:41 +0800 Subject: Revert "f2fs: move i_size_write in f2fs_write_end" This reverts commit a2ee0a300344a6da76186129b078113354fe13d2. When testing with generic/032 of xfstest suit, failure message will be reported as below: generic/032 8s ... [failed, exit status 1] - output mismatch (see results/generic/032.out.bad) --- tests/generic/032.out 2015-01-11 16:52:27.643681072 +0800 +++ results/generic/032.out.bad 2016-08-06 13:44:43.861330500 +0800 @@ -1,5 +1,5 @@ QA output created by 032 -100 iterations -0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd -* -0100000 +1: [768..775]: unwritten +Unwritten extents found! ... (Run 'diff -u tests/generic/032.out results/generic/032.out.bad' to see the entire diff) Ran: generic/032 Failures: generic/032 Failed 1 of 1 tests In write_end(), we should update i_size of inode before unlock page, otherwise, we will lose newly updated data in following race condition. Thread A Thread B - write_end - unlock page - writepages - lock_page - writepage if page is out-of-range of file size, we will skip writting the page. - update i_size Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d64d2a5..ccb401e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1699,11 +1699,11 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); set_page_dirty(page); - f2fs_put_page(page, 1); if (pos + copied > i_size_read(inode)) f2fs_i_size_write(inode, pos + copied); + f2fs_put_page(page, 1); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } -- cgit v0.10.2 From fe8494bfc8c2914fca821d4ae994aef039be5cf1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Aug 2016 20:13:02 +0800 Subject: f2fs: allow copying file range only in between regular files Only if two input files are regular files, we allow copying data in range of them, otherwise, deny it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0e493f6..685e629 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2086,8 +2086,8 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, if (unlikely(f2fs_readonly(src->i_sb))) return -EROFS; - if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode)) - return -EISDIR; + if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) + return -EINVAL; if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) return -EOPNOTSUPP; -- cgit v0.10.2 From 20a3d61d46e1fb45efa6eb4637c0dcd3f00a14e9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Aug 2016 20:13:03 +0800 Subject: f2fs: avoid potential deadlock in f2fs_move_file_range Thread A Thread B - inode_lock fileA - inode_lock fileB - inode_lock fileA - inode_lock fileB We may encounter above potential deadlock during moving file range in concurrent scenario. This patch fixes the issue by using inode_trylock instead. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 685e629..47abb96 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2093,8 +2093,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, return -EOPNOTSUPP; inode_lock(src); - if (src != dst) - inode_lock(dst); + if (src != dst) { + if (!inode_trylock(dst)) { + ret = -EBUSY; + goto out; + } + } ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) @@ -2152,6 +2156,7 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, out_unlock: if (src != dst) inode_unlock(dst); +out: inode_unlock(src); return ret; } -- cgit v0.10.2 From d9dc1702b297ec4a6bb9c0326a70641b322ba886 Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Fri, 17 Jun 2016 15:01:54 -0700 Subject: bcache: register_bcache(): call blkdev_put() when cache_alloc() fails register_cache() is supposed to return an error string on error so that register_bcache() will will blkdev_put and cleanup other user counters, but it does not set 'char *err' when cache_alloc() fails (eg, due to memory pressure) and thus register_bcache() performs no cleanup. register_bcache() <----------\ <- no jump to err_close, no blkdev_put() | | +->register_cache() | <- fails to set char *err | | +->cache_alloc() ---/ <- returns error This patch sets `char *err` for this failure case so that register_cache() will cause register_bcache() to correctly jump to err_close and do cleanup. This was tested under OOM conditions that triggered the bug. Signed-off-by: Eric Wheeler Cc: Kent Overstreet Cc: stable@vger.kernel.org diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 95a4ca6..6ada14b 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1844,7 +1844,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; - const char *err = NULL; + const char *err = NULL; /* must be set for any error case */ int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); @@ -1861,8 +1861,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(ca); - if (ret != 0) + if (ret != 0) { + if (ret == -ENOMEM) + err = "cache_alloc(): -ENOMEM"; + else + err = "cache_alloc(): unknown error"; goto err; + } if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { err = "error calling kobject_add"; -- cgit v0.10.2 From acc9cf8c66c66b2cbbdb4a375537edee72be64df Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Aug 2016 18:21:24 -0700 Subject: bcache: RESERVE_PRIO is too small by one when prio_buckets() is a power of two. This patch fixes a cachedev registration-time allocation deadlock. This can deadlock on boot if your initrd auto-registeres bcache devices: Allocator thread: [ 720.727614] INFO: task bcache_allocato:3833 blocked for more than 120 seconds. [ 720.732361] [] schedule+0x37/0x90 [ 720.732963] [] bch_bucket_alloc+0x188/0x360 [bcache] [ 720.733538] [] ? prepare_to_wait_event+0xf0/0xf0 [ 720.734137] [] bch_prio_write+0x19d/0x340 [bcache] [ 720.734715] [] bch_allocator_thread+0x3ff/0x470 [bcache] [ 720.735311] [] ? __schedule+0x2dc/0x950 [ 720.735884] [] ? invalidate_buckets+0x980/0x980 [bcache] Registration thread: [ 720.710403] INFO: task bash:3531 blocked for more than 120 seconds. [ 720.715226] [] schedule+0x37/0x90 [ 720.715805] [] __bch_btree_map_nodes+0x12d/0x150 [bcache] [ 720.716409] [] ? bch_btree_insert_check_key+0x1c0/0x1c0 [bcache] [ 720.717008] [] bch_btree_insert+0xf4/0x170 [bcache] [ 720.717586] [] ? prepare_to_wait_event+0xf0/0xf0 [ 720.718191] [] bch_journal_replay+0x14a/0x290 [bcache] [ 720.718766] [] ? ttwu_do_activate.constprop.94+0x5d/0x70 [ 720.719369] [] ? try_to_wake_up+0x1d4/0x350 [ 720.719968] [] run_cache_set+0x580/0x8e0 [bcache] [ 720.720553] [] register_bcache+0xe2e/0x13b0 [bcache] [ 720.721153] [] kobj_attr_store+0xf/0x20 [ 720.721730] [] sysfs_kf_write+0x3d/0x50 [ 720.722327] [] kernfs_fop_write+0x12a/0x180 [ 720.722904] [] __vfs_write+0x37/0x110 [ 720.723503] [] ? __sb_start_write+0x58/0x110 [ 720.724100] [] ? security_file_permission+0x23/0xa0 [ 720.724675] [] vfs_write+0xa9/0x1b0 [ 720.725275] [] ? do_audit_syscall_entry+0x6c/0x70 [ 720.725849] [] SyS_write+0x55/0xd0 [ 720.726451] [] ? do_page_fault+0x30/0x80 [ 720.727045] [] system_call_fastpath+0x12/0x71 The fifo code in upstream bcache can't use the last element in the buffer, which was the cause of the bug: if you asked for a power of two size, it'd give you a fifo that could hold one less than what you asked for rather than allocating a buffer twice as big. Signed-off-by: Kent Overstreet Tested-by: Eric Wheeler Cc: stable@vger.kernel.org diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6ada14b..6b93e1b 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1820,7 +1820,7 @@ static int cache_alloc(struct cache *ca) free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || + !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || -- cgit v0.10.2 From 90706094d5be614ae7285b3c96c3125bb198618c Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Thu, 18 Aug 2016 20:15:26 -0700 Subject: bcache: pr_err: more meaningful error message when nr_stripes is invalid The original error was thought to be corruption, but was actually caused by: make-bcache --data-offset N where N was in bytes and should have been in sectors. While userspace tools should be updated to check --data-offset beyond end of volume, hopefully this will help others that might not have noticed the units. Signed-off-by: Eric Wheeler Cc: Kent Overstreet diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6b93e1b..849ad44 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -760,7 +760,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, if (!d->nr_stripes || d->nr_stripes > INT_MAX || d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) { - pr_err("nr_stripes too large"); + pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", + (unsigned)d->nr_stripes); return -ENOMEM; } -- cgit v0.10.2 From 039a392733600d35c80d406a98151b2a9a0a74b4 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Tue, 16 Aug 2016 18:40:18 +0300 Subject: qede: Fix Tx timeout due to xmit_more Driver uses netif_tx_queue_stopped() to make sure the xmit_more indication will be honored, but that only checks for DRV_XOFF. At the same time, it's possible that during transmission the DQL will close the transmission queue with STACK_XOFF indication. In re-configuration flows, when the threshold is relatively low, it's possible that the device has no pending tranmissions, and during tranmission the driver would miss doorbelling the HW. Since there are no pending transmission, there will never be a Tx completion [and thus the DQL would not remove the STACK_XOFF indication], eventually causing the Tx queue to timeout. While we're at it - also doorbell in case driver has to close the transmission queue on its own [although this one is less important - if the ring is full, we're bound to receive completion eventually, which means the doorbell would only be postponed and not indefinetly blocked]. Fixes: 312e06761c99 ("qede: Utilize xmit_more") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index e4bd02e..a6eb6af 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -722,11 +722,14 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, txq->tx_db.data.bd_prod = cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl)); - if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq)) + if (!skb->xmit_more || netif_xmit_stopped(netdev_txq)) qede_update_tx_producer(txq); if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1))) { + if (skb->xmit_more) + qede_update_tx_producer(txq); + netif_tx_stop_queue(netdev_txq); DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED, "Stop queue was called\n"); -- cgit v0.10.2 From 1423661fed2c40d6d71b5e2e3aa390f85157f9d5 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 Aug 2016 13:30:36 -0700 Subject: net: thunderx: Fix OOPs with ethtool --register-dump The ethtool_ops .get_regs function attempts to read the nonexistent register NIC_QSET_SQ_0_7_CNM_CHG, which produces a "bus error" type OOPs. Fix by not attempting to read, and removing the definition of, NIC_QSET_SQ_0_7_CNM_CHG. A zero is written into the register dump to keep the layout unchanged. Signed-off-by: David Daney Cc: # 4.4.x- Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index afb10e3..fab35a5 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -170,7 +170,6 @@ #define NIC_QSET_SQ_0_7_DOOR (0x010838) #define NIC_QSET_SQ_0_7_STATUS (0x010840) #define NIC_QSET_SQ_0_7_DEBUG (0x010848) -#define NIC_QSET_SQ_0_7_CNM_CHG (0x010860) #define NIC_QSET_SQ_0_7_STAT_0_1 (0x010900) #define NIC_QSET_RBDR_0_1_CFG (0x010C00) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index d2d8ef2..ad4fddb 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -382,7 +382,10 @@ static void nicvf_get_regs(struct net_device *dev, p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q); p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q); p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q); - p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q); + /* Padding, was NIC_QSET_SQ_0_7_CNM_CHG, which + * produces bus errors when read + */ + p[i++] = 0; p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q); reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3); p[i++] = nicvf_queue_reg_read(nic, reg_offset, q); -- cgit v0.10.2 From e0d8b2908696d30583ae5764e33332e71cbbccc9 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 17 Aug 2016 14:09:28 +0530 Subject: cxgb4: Fixes resource allocation for ULD's in kdump kernel At present the code to check in kdump kernel was not disabling allocation of resources when CONFIG_CHELSIO_T4_DCB is defined, move the code outside #defines so that it gets disabled irrespective of #define, when in kdump kernel. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c45de49..c762a8c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4335,6 +4335,11 @@ static void cfg_queues(struct adapter *adap) #endif int ciq_size; + /* Reduce memory usage in kdump environment, disable all offload. + */ + if (is_kdump_kernel()) + adap->params.offload = 0; + for_each_port(adap, i) n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); #ifdef CONFIG_CHELSIO_T4_DCB @@ -4365,11 +4370,6 @@ static void cfg_queues(struct adapter *adap) if (q10g > netif_get_num_default_rss_queues()) q10g = netif_get_num_default_rss_queues(); - /* Reduce memory usage in kdump environment, disable all offload. - */ - if (is_kdump_kernel()) - adap->params.offload = 0; - for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); -- cgit v0.10.2 From bb1fceca22492109be12640d49f5ea5a544c6bb4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Aug 2016 05:56:26 -0700 Subject: tcp: fix use after free in tcp_xmit_retransmit_queue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When tcp_sendmsg() allocates a fresh and empty skb, it puts it at the tail of the write queue using tcp_add_write_queue_tail() Then it attempts to copy user data into this fresh skb. If the copy fails, we undo the work and remove the fresh skb. Unfortunately, this undo lacks the change done to tp->highest_sack and we can leave a dangling pointer (to a freed skb) Later, tcp_xmit_retransmit_queue() can dereference this pointer and access freed memory. For regular kernels where memory is not unmapped, this might cause SACK bugs because tcp_highest_sack_seq() is buggy, returning garbage instead of tp->snd_nxt, but with various debug features like CONFIG_DEBUG_PAGEALLOC, this can crash the kernel. This bug was found by Marco Grassi thanks to syzkaller. Fixes: 6859d49475d4 ("[TCP]: Abstract tp->highest_sack accessing & point to next skb") Reported-by: Marco Grassi Signed-off-by: Eric Dumazet Cc: Ilpo Järvinen Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Reviewed-by: Cong Wang Signed-off-by: David S. Miller diff --git a/include/net/tcp.h b/include/net/tcp.h index c00e7d5..7717302 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1523,6 +1523,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli { if (sk->sk_send_head == skb_unlinked) sk->sk_send_head = NULL; + if (tcp_sk(sk)->highest_sack == skb_unlinked) + tcp_sk(sk)->highest_sack = NULL; } static inline void tcp_init_send_head(struct sock *sk) -- cgit v0.10.2 From b9f63ae7ba2de2ba19137c5757c0607ce40f3ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 17 Aug 2016 15:37:14 +0200 Subject: net: bgmac: fix reversed check for MII registration error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was failing on successful registration returning meaningless errors. Signed-off-by: Rafał Miłecki Fixes: 55954f3bfdac ("net: ethernet: bgmac: move BCMA MDIO Phy code into a separate file") Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 9a9745c4..625235d 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -159,7 +159,7 @@ static int bgmac_probe(struct bcma_device *core) if (!bgmac_is_bcm4707_family(core)) { mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr); - if (!IS_ERR(mii_bus)) { + if (IS_ERR(mii_bus)) { err = PTR_ERR(mii_bus); goto err; } -- cgit v0.10.2 From 60bcabd080f53561efa9288be45c128feda1a8bb Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 17 Aug 2016 15:51:55 +0200 Subject: kaweth: fix firmware download This fixes the oops discovered by the Umap2 project and Alan Stern. The intf member needs to be set before the firmware is downloaded. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 770212b..37bf715 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1029,6 +1029,7 @@ static int kaweth_probe( kaweth = netdev_priv(netdev); kaweth->dev = udev; kaweth->net = netdev; + kaweth->intf = intf; spin_lock_init(&kaweth->device_lock); init_waitqueue_head(&kaweth->term_wait); @@ -1139,8 +1140,6 @@ err_fw: dev_dbg(dev, "Initializing net device.\n"); - kaweth->intf = intf; - kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!kaweth->tx_urb) goto err_free_netdev; -- cgit v0.10.2 From 575ced7f8090c1a4e91e2daf8da9352a6a1fc7a7 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 17 Aug 2016 15:51:56 +0200 Subject: kaweth: fix oops upon failed memory allocation Just return an error upon failure. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 37bf715..528b9c9 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1009,6 +1009,7 @@ static int kaweth_probe( struct net_device *netdev; const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; int result = 0; + int rv = -EIO; dev_dbg(dev, "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n", @@ -1049,6 +1050,10 @@ static int kaweth_probe( /* Download the firmware */ dev_info(dev, "Downloading firmware...\n"); kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL); + if (!kaweth->firmware_buf) { + rv = -ENOMEM; + goto err_free_netdev; + } if ((result = kaweth_download_firmware(kaweth, "kaweth/new_code.bin", 100, @@ -1203,7 +1208,7 @@ err_only_tx: err_free_netdev: free_netdev(netdev); - return -EIO; + return rv; } /**************************************************************** -- cgit v0.10.2 From 76507fdfc9b629209ae20cd469da2f6d093a507c Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 7 Aug 2016 21:01:48 +0200 Subject: dmaengine: pxa_dma: fix hotchain corner case In the case where a descriptor is chained on a running channel, and as explained in the comment in the code 10 lines above, the success of the chaining is ensured either if : - the DMA is still running - or if the chained transfer is completed Unfortunately the transfer completness test was done on the descriptor to which the transfer was chained, and not the transfer being chained at the end, ie. hot-chained. This corner case is extremely hard to trigger, as usually the DMA chain is still running, and the first case takes care of returning success of the hot-chaining. It was seen by hot-chaining several "small transfers" to a running "big transfer", not in a real-life usecase but by testing the robustness of the driver. Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index dc7850a..2093e52 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -638,7 +638,7 @@ static bool pxad_try_hotchain(struct virt_dma_chan *vc, vd_last_issued = list_entry(vc->desc_issued.prev, struct virt_dma_desc, node); pxad_desc_chain(vd_last_issued, vd); - if (is_chan_running(chan) || is_desc_completed(vd_last_issued)) + if (is_chan_running(chan) || is_desc_completed(vd)) return true; } -- cgit v0.10.2 From 98a384eca9c147f890b5ea31ae91da3769e47e07 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 18 Aug 2016 12:33:28 +0800 Subject: fib_trie: Fix the description of pos and bits 1) Fix one typo: s/tn/tp/ 2) Fix the description about the "u" bits. Signed-off-by: Xunlei Pang Acked-by: Alexander Duyck Signed-off-by: David S. Miller diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index febca0f..e2ffc2a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -249,7 +249,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv) * index into the parent's child array. That is, they will be used to find * 'n' among tp's children. * - * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits + * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits * for the node n. * * All the bits we have seen so far are significant to the node n. The rest @@ -258,7 +258,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv) * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into * n's child array, and will of course be different for each child. * - * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown + * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown * at this point. */ -- cgit v0.10.2 From e633fc7a1347528c3b4a6bbdeb41f5d63988242c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 11 Aug 2016 17:44:05 +0100 Subject: iommu/io-pgtable-arm-v7s: Fix attributes when splitting blocks Due to the attribute bits being all over the place in the different types of short-descriptor PTEs, when remapping an existing entry, e.g. splitting a section into pages, we take the approach of decomposing the PTE attributes back to the IOMMU API flags to start from scratch. On inspection, though, the existing code seems to have got the read-only bit backwards and ignored the XN bit. How embarrassing... Fortunately the primary user so far, the Mediatek IOMMU, both never splits blocks (because it only serves non-overlapping DMA API calls) and also ignores permissions anyway, but let's put things right before any future users trip up. Cc: Fixes: e5fc9753b1a8 ("iommu/io-pgtable: Add ARMv7 short descriptor support") Signed-off-by: Robin Murphy Signed-off-by: Will Deacon diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 8c61399..def8ca1 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -286,12 +286,14 @@ static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl) int prot = IOMMU_READ; arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl); - if (attr & ARM_V7S_PTE_AP_RDONLY) + if (!(attr & ARM_V7S_PTE_AP_RDONLY)) prot |= IOMMU_WRITE; if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0) prot |= IOMMU_MMIO; else if (pte & ARM_V7S_ATTR_C) prot |= IOMMU_CACHE; + if (pte & ARM_V7S_ATTR_XN(lvl)) + prot |= IOMMU_NOEXEC; return prot; } -- cgit v0.10.2 From aea2037e0d3e23c3be1498feae29f71ca997d9e6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 Jul 2016 11:15:37 +0100 Subject: iommu/arm-smmu: Fix CMDQ error handling In the unlikely event of a global command queue error, the ARM SMMUv3 driver attempts to convert the problematic command into a CMD_SYNC and resume the command queue. Unfortunately, this code is pretty badly broken: 1. It uses the index into the error string table as the CMDQ index, so we probably read the wrong entry out of the queue 2. The arguments to queue_write are the wrong way round, so we end up writing from the queue onto the stack. These happily cancel out, so the kernel is likely to stay alive, but the command queue will probably fault again when we resume. This patch fixes the error handling code to use the correct queue index and write back the CMD_SYNC to the faulting entry. Cc: Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Reported-by: Diwakar Subraveti Signed-off-by: Will Deacon diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index ce80117..330623f 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -879,7 +879,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) * We may have concurrent producers, so we need to be careful * not to touch any of the shadow cmdq state. */ - queue_read(cmd, Q_ENT(q, idx), q->ent_dwords); + queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); dev_err(smmu->dev, "skipping command in error state:\n"); for (i = 0; i < ARRAY_SIZE(cmd); ++i) dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); @@ -890,7 +890,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) return; } - queue_write(cmd, Q_ENT(q, idx), q->ent_dwords); + queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, -- cgit v0.10.2 From 7611da865c1060b2a7c87a15de663a59035747f8 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 18 Aug 2016 15:41:58 -0700 Subject: irqchip/gicv3-its: Disable the ITS before initializing it When starting a kexec/kdump kernel, the GIC ITS will already have been enabled. According to the ARM Generic Interrupt Controller Architecture Specification (GIC architecture Version 3.0 and version 4.0), writing to GITS_BASER or GITS_CBASER is "UNPREDICTABLE" when the ITS is enabled. On Cavium Thunder systems, this prevents the ITS from being initializing in the kexec/kdump kernel, resulting in failure to register/enable interrupts for all devices. The fix is to disable the ITS if it is not already in the disabled state. This allows the ITS to be properly initialized and then re-enabled in the kexec/kdump kernel. Acked-by: Marc Zyngier Signed-off-by: David Daney Signed-off-by: Marc Zyngier diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7ceaba8..36b9c28 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1545,7 +1545,12 @@ static int its_force_quiescent(void __iomem *base) u32 val; val = readl_relaxed(base + GITS_CTLR); - if (val & GITS_CTLR_QUIESCENT) + /* + * GIC architecture specification requires the ITS to be both + * disabled and quiescent for writes to GITS_BASER or + * GITS_CBASER to not have UNPREDICTABLE results. + */ + if ((val & GITS_CTLR_QUIESCENT) && !(val & GITS_CTLR_ENABLE)) return 0; /* Disable the generation of all interrupts to this ITS */ -- cgit v0.10.2 From 3714ce1d6655098ee69ede632883e5874d67e4ab Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Aug 2016 19:49:45 +0100 Subject: iommu/arm-smmu: Disable stalling faults for all endpoints Enabling stalling faults can result in hardware deadlock on poorly designed systems, particularly those with a PCI root complex upstream of the SMMU. Although it's not really Linux's job to save hardware integrators from their own misfortune, it *is* our job to stop userspace (e.g. VFIO clients) from hosing the system for everybody else, even if they might already be required to have elevated privileges. Given that the fault handling code currently executes entirely in IRQ context, there is nothing that can sensibly be done to recover from things like page faults anyway, so let's rip this code out for now and avoid the potential for deadlock. Cc: Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Reported-by: Matt Evans Signed-off-by: Will Deacon diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 4f49fe2..2db74eb 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -686,8 +686,7 @@ static struct iommu_gather_ops arm_smmu_gather_ops = { static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { - int flags, ret; - u32 fsr, fsynr, resume; + u32 fsr, fsynr; unsigned long iova; struct iommu_domain *domain = dev; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -701,34 +700,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) if (!(fsr & FSR_FAULT)) return IRQ_NONE; - if (fsr & FSR_IGN) - dev_err_ratelimited(smmu->dev, - "Unexpected context fault (fsr 0x%x)\n", - fsr); - fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); - flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; - iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR); - if (!report_iommu_fault(domain, smmu->dev, iova, flags)) { - ret = IRQ_HANDLED; - resume = RESUME_RETRY; - } else { - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n", - iova, fsynr, cfg->cbndx); - ret = IRQ_NONE; - resume = RESUME_TERMINATE; - } - - /* Clear the faulting FSR */ - writel(fsr, cb_base + ARM_SMMU_CB_FSR); - /* Retry or terminate any stalled transactions */ - if (fsr & FSR_SS) - writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME); + dev_err_ratelimited(smmu->dev, + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n", + fsr, iova, fsynr, cfg->cbndx); - return ret; + writel(fsr, cb_base + ARM_SMMU_CB_FSR); + return IRQ_HANDLED; } static irqreturn_t arm_smmu_global_fault(int irq, void *dev) @@ -837,7 +817,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, } /* SCTLR */ - reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP; + reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP; if (stage1) reg |= SCTLR_S1_ASIDPNE; #ifdef __BIG_ENDIAN -- cgit v0.10.2 From 5bc0a11664e17e9f9551983f5b660bd48b57483c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Aug 2016 14:29:16 +0100 Subject: iommu/arm-smmu: Don't BUG() if we find aborting STEs with disable_bypass The disable_bypass cmdline option changes the SMMUv3 driver to put down faulting stream table entries by default, as opposed to bypassing transactions from unconfigured devices. In this mode of operation, it is entirely expected to see aborting entries in the stream table if and when we come to installing a valid translation, so don't trigger a BUG() as a result of misdiagnosing these entries as stream table corruption. Cc: Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Tested-by: Robin Murphy Reported-by: Robin Murphy Reviewed-by: Robin Murphy Signed-off-by: Will Deacon diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 330623f..641e887 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1034,6 +1034,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, case STRTAB_STE_0_CFG_S2_TRANS: ste_live = true; break; + case STRTAB_STE_0_CFG_ABORT: + if (disable_bypass) + break; default: BUG(); /* STE corruption */ } -- cgit v0.10.2 From 7a665d2f60b457c0d77b3e4f01e21c55ffc57069 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Tue, 28 Jun 2016 11:20:23 -0700 Subject: nvme-fabrics: change NQN UUID to big-endian format NVM Express 1.2.1 section 7.9, NVMe Qualified Names, specifies that the UUID format of NQN uses a UUID based on RFC 4122. RFC 4122 specifies that the UUID is encoded in big-endian byte order. Switch the NVMe over Fabrics host ID field from little-endian UUID to big-endian UUID to match the specification. Signed-off-by: Daniel Verkamp Reviewed-by: Jay Freyensee Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 020302c..be0b106 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -56,7 +56,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) kref_init(&host->ref); memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); - uuid_le_gen(&host->id); + uuid_be_gen(&host->id); list_add_tail(&host->list, &nvmf_hosts); out_unlock: @@ -73,9 +73,9 @@ static struct nvmf_host *nvmf_host_default(void) return NULL; kref_init(&host->ref); - uuid_le_gen(&host->id); + uuid_be_gen(&host->id); snprintf(host->nqn, NVMF_NQN_SIZE, - "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUl", &host->id); + "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts); @@ -382,7 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) if (!data) return -ENOMEM; - memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le)); + memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be)); data->cntlid = cpu_to_le16(0xffff); strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); @@ -441,7 +441,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) if (!data) return -ENOMEM; - memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le)); + memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be)); data->cntlid = cpu_to_le16(ctrl->cntlid); strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 89df52c..46e460a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -34,7 +34,7 @@ struct nvmf_host { struct kref ref; struct list_head list; char nqn[NVMF_NQN_SIZE]; - uuid_le id; + uuid_be id; }; /** diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d8b37ba..7676557 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -794,7 +794,7 @@ struct nvmf_connect_command { }; struct nvmf_connect_data { - uuid_le hostid; + uuid_be hostid; __le16 cntlid; char resv4[238]; char subsysnqn[NVMF_NQN_FIELD_LEN]; -- cgit v0.10.2 From d1e81428826221d7ff8e4d83db784d099cd232a7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 18 Aug 2016 19:32:59 +0100 Subject: ASoC: core: Clean up DAPM before the card debugfs Both the card and DAPM cleanups recursively delete their debugfs directories. Since the DAPM debugfs subdirectory for the card is located within the card debugfs this means we end up trying to double free the DAPM subdirectory. Reorder the cleanup to free the card debugfs after we've cleaned up DAPM and it has deleted its own subdirectory. Reported-by: Russell King - ARM Linux Tested-by: Russell King Signed-off-by: Mark Brown diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 16369ca..66a33f1 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2083,14 +2083,13 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card) /* remove auxiliary devices */ soc_remove_aux_devices(card); + snd_soc_dapm_free(&card->dapm); soc_cleanup_card_debugfs(card); /* remove the card */ if (card->remove) card->remove(card); - snd_soc_dapm_free(&card->dapm); - snd_card_free(card->snd_card); return 0; -- cgit v0.10.2 From 06777c4ec78a43977b63f1d5045def057227c2c5 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 7 Aug 2016 21:01:49 +0200 Subject: dmaengine: pxa_dma: fix debug message In a very tight timeframe, the debug message in the transfer completion handler can be misleading, as the completion test report can change just after the message, and the code flow cannot be deduced from the debug message. This is just a cleanup to make debugging easier. Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 2093e52..3f56f9c 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -671,6 +671,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) struct virt_dma_desc *vd, *tmp; unsigned int dcsr; unsigned long flags; + bool vd_completed; dma_cookie_t last_started = 0; BUG_ON(!chan); @@ -681,15 +682,17 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) spin_lock_irqsave(&chan->vc.lock, flags); list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) { + vd_completed = is_desc_completed(vd); dev_dbg(&chan->vc.chan.dev->device, - "%s(): checking txd %p[%x]: completed=%d\n", - __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + "%s(): checking txd %p[%x]: completed=%d dcsr=0x%x\n", + __func__, vd, vd->tx.cookie, vd_completed, + dcsr); last_started = vd->tx.cookie; if (to_pxad_sw_desc(vd)->cyclic) { vchan_cyclic_callback(vd); break; } - if (is_desc_completed(vd)) { + if (vd_completed) { list_del(&vd->node); vchan_cookie_complete(vd); } else { -- cgit v0.10.2 From 98096d8a787f05b1afe3869aa01e84981915c81d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Aug 2016 11:16:35 -0700 Subject: nvme-fabrics: get a reference when reusing a nvme_host structure Without this we'll get a use after free after connecting two controller using the same hostnqn and then disconnecting one of them. Signed-off-by: Christoph Hellwig Reviewed-by: Jay Freyensee Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index be0b106..4eff491 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -47,8 +47,10 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) mutex_lock(&nvmf_hosts_mutex); host = __nvmf_host_find(hostnqn); - if (host) + if (host) { + kref_get(&host->ref); goto out_unlock; + } host = kmalloc(sizeof(*host), GFP_KERNEL); if (!host) -- cgit v0.10.2 From aa71987472a974f4f6dc4be377720564079ef42e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Aug 2016 11:16:36 -0700 Subject: nvme: fabrics drivers don't need the nvme-pci driver So select the NVME_CORE symbol instead of depending on BLK_DEV_NVME. Signed-off-by: Christoph Hellwig Reviewed-by: Jay Freyensee Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index db39d53..0c644f7 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -31,7 +31,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" depends on INFINIBAND - depends on BLK_DEV_NVME + select NVME_CORE select NVME_FABRICS select SG_POOL help diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index a5c31cb..3a5b9d0 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -15,8 +15,8 @@ config NVME_TARGET config NVME_TARGET_LOOP tristate "NVMe loopback device support" - depends on BLK_DEV_NVME depends on NVME_TARGET + select NVME_CORE select NVME_FABRICS select SG_POOL help -- cgit v0.10.2 From 2527ecc9195e9c66252af24c4689e8a67cd4ccb9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 16 Aug 2016 09:58:25 +0200 Subject: gpio: Fix OF build problem on UM The UserMode (UM) Linux build was failing in gpiolib-of as it requires ioremap()/iounmap() to exist, which is absent from UM. The non-existence of IO memory is negatively defined as CONFIG_NO_IOMEM which means we need to depend on HAS_IOMEM. Cc: stable@vger.kernel.org Cc: Geert Uytterhoeven Reported-by: kbuild test robot Signed-off-by: Linus Walleij diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 98dd47a..24f833c 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -50,6 +50,7 @@ config GPIO_DEVRES config OF_GPIO def_bool y depends on OF + depends on HAS_IOMEM config GPIO_ACPI def_bool y -- cgit v0.10.2 From 048c28c91e56781082bc17d181e460b81e7e8bcb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 16 Aug 2016 12:07:31 +0200 Subject: gpio: make any OF dependent driver depend on OF_GPIO The drivers that depend on OF but not OF_GPIO are wreaking havoc with the autobuilders for archs that have all requirements for OF but not for OF_GPIO, particularly the UM (Usermode) arch does not have iomem (NO_IOMEM) which result in configuring GPIOLIB but without OF_GPIO which is wrong if the driver is using the .of_node of the gpiochip, which only appears with OF_GPIO. After a brief look at the drivers just depending on OF it seems most if not all of them actually require stuff from gpiolib-of so the dependency is wrong in the first place. This simply patches the Kconfig so that all GPIO drivers using OF depend on OF_GPIO rather than just OF. Cc: Rabin Vincent Cc: Pramod Gurav Cc: Andreas Larsson Cc: Gregory CLEMENT Cc: Thierry Reding Cc: Laxman Dewangan Cc: Alexandre Courbot Cc: Geert Uytterhoeven Cc: Phil Reid Signed-off-by: Linus Walleij diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 24f833c..66a9410 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -189,7 +189,7 @@ config GPIO_EP93XX config GPIO_ETRAXFS bool "Axis ETRAX FS General I/O" depends on CRIS || COMPILE_TEST - depends on OF + depends on OF_GPIO select GPIO_GENERIC select GPIOLIB_IRQCHIP help @@ -215,7 +215,7 @@ config GPIO_GENERIC_PLATFORM config GPIO_GRGPIO tristate "Aeroflex Gaisler GRGPIO support" - depends on OF + depends on OF_GPIO select GPIO_GENERIC select IRQ_DOMAIN help @@ -313,7 +313,7 @@ config GPIO_MPC8XXX config GPIO_MVEBU def_bool y depends on PLAT_ORION - depends on OF + depends on OF_GPIO select GENERIC_IRQ_CHIP config GPIO_MXC @@ -406,7 +406,7 @@ config GPIO_TEGRA bool "NVIDIA Tegra GPIO support" default ARCH_TEGRA depends on ARCH_TEGRA || COMPILE_TEST - depends on OF + depends on OF_GPIO help Say yes here to support GPIO pins on NVIDIA Tegra SoCs. @@ -1100,7 +1100,7 @@ menu "SPI GPIO expanders" config GPIO_74X164 tristate "74x164 serial-in/parallel-out 8-bits shift register" - depends on OF + depends on OF_GPIO help Driver for 74x164 compatible serial-in/parallel-out 8-outputs shift registers. This driver can be used to provide access -- cgit v0.10.2 From a305a4387acb01cecadeeea5151c049a022a1bfc Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Wed, 17 Aug 2016 16:14:59 +0100 Subject: thermal: cpu_cooling: Fix NULL dereference in cpufreq_state2power Currently all CPU cooling devices share a `struct thermal_cooling_device_ops` instance. The thermal core uses the presence of functions in this struct to determine if a cooling device has a power model (see cdev_is_power_actor). cpu_cooling.c adds the power model functions to the shared struct when a device is registered with a power model. Therefore, if a CPU cooling device is registered using [of_]cpufreq_power_cooling_register, _all_ devices will be determined to have a power model, including any registered with [of_]cpufreq_cooling_register. This can result in cpufreq_state2power being called on a device where dyn_power_table is NULL. With this commit, instead of having a shared thermal_cooling_device_ops which is mutated, we have two versions: one with the power functions and one without. Signed-off-by: Brendan Jackman Cc: Amit Daniel Kachhap Cc: Viresh Kumar Cc: Javi Merino Acked-by: Viresh Kumar Acked-by: Javi Merino Signed-off-by: Zhang Rui diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 3788ed7..a32b417 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -740,12 +740,22 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, } /* Bind cpufreq callbacks to thermal cooling device ops */ + static struct thermal_cooling_device_ops cpufreq_cooling_ops = { .get_max_state = cpufreq_get_max_state, .get_cur_state = cpufreq_get_cur_state, .set_cur_state = cpufreq_set_cur_state, }; +static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = { + .get_max_state = cpufreq_get_max_state, + .get_cur_state = cpufreq_get_cur_state, + .set_cur_state = cpufreq_set_cur_state, + .get_requested_power = cpufreq_get_requested_power, + .state2power = cpufreq_state2power, + .power2state = cpufreq_power2state, +}; + /* Notifier for cpufreq policy change */ static struct notifier_block thermal_cpufreq_notifier_block = { .notifier_call = cpufreq_thermal_notifier, @@ -795,6 +805,7 @@ __cpufreq_cooling_register(struct device_node *np, struct cpumask temp_mask; unsigned int freq, i, num_cpus; int ret; + struct thermal_cooling_device_ops *cooling_ops; cpumask_and(&temp_mask, clip_cpus, cpu_online_mask); policy = cpufreq_cpu_get(cpumask_first(&temp_mask)); @@ -850,10 +861,6 @@ __cpufreq_cooling_register(struct device_node *np, cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus); if (capacitance) { - cpufreq_cooling_ops.get_requested_power = - cpufreq_get_requested_power; - cpufreq_cooling_ops.state2power = cpufreq_state2power; - cpufreq_cooling_ops.power2state = cpufreq_power2state; cpufreq_dev->plat_get_static_power = plat_static_func; ret = build_dyn_power_table(cpufreq_dev, capacitance); @@ -861,6 +868,10 @@ __cpufreq_cooling_register(struct device_node *np, cool_dev = ERR_PTR(ret); goto free_table; } + + cooling_ops = &cpufreq_power_cooling_ops; + } else { + cooling_ops = &cpufreq_cooling_ops; } ret = get_idr(&cpufreq_idr, &cpufreq_dev->id); @@ -885,7 +896,7 @@ __cpufreq_cooling_register(struct device_node *np, cpufreq_dev->id); cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev, - &cpufreq_cooling_ops); + cooling_ops); if (IS_ERR(cool_dev)) goto remove_idr; -- cgit v0.10.2 From 57027db00d1094d0abd5776899b00ca55d42d37c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 15 Aug 2016 11:22:40 +0200 Subject: Thermal-INT3406: Delete owner assignment The field "owner" is set by core. Thus delete an extra initialisation. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Markus Elfring Signed-off-by: Zhang Rui diff --git a/drivers/thermal/int340x_thermal/int3406_thermal.c b/drivers/thermal/int340x_thermal/int3406_thermal.c index a578cd2..1891f34 100644 --- a/drivers/thermal/int340x_thermal/int3406_thermal.c +++ b/drivers/thermal/int340x_thermal/int3406_thermal.c @@ -225,7 +225,6 @@ static struct platform_driver int3406_thermal_driver = { .remove = int3406_thermal_remove, .driver = { .name = "int3406 thermal", - .owner = THIS_MODULE, .acpi_match_table = int3406_thermal_match, }, }; -- cgit v0.10.2 From 55f2ac33adc78d429c470c9ca05e18c36dc24922 Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Tue, 3 May 2016 20:07:41 +0800 Subject: thermal: trivial: fix the typo See the thermal code, the obvious typo from my editor. Signed-off-by: Caesar Wang Signed-off-by: Eduardo Valentin Signed-off-by: Zhang Rui diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt index 41b817f..88b6ea1 100644 --- a/Documentation/devicetree/bindings/thermal/thermal.txt +++ b/Documentation/devicetree/bindings/thermal/thermal.txt @@ -62,7 +62,7 @@ For more examples of cooling devices, refer to the example sections below. Required properties: - #cooling-cells: Used to provide cooling device specific information Type: unsigned while referring to it. Must be at least 2, in order - Size: one cell to specify minimum and maximum cooling state used + Size: one cell to specify minimum and maximum cooling state used in the reference. The first cell is the minimum cooling state requested and the second cell is the maximum cooling state requested in the reference. @@ -119,7 +119,7 @@ Required properties: Optional property: - contribution: The cooling contribution to the thermal zone of the Type: unsigned referred cooling device at the referred trip point. - Size: one cell The contribution is a ratio of the sum + Size: one cell The contribution is a ratio of the sum of all cooling contributions within a thermal zone. Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle @@ -145,7 +145,7 @@ Required properties: Size: one cell - thermal-sensors: A list of thermal sensor phandles and sensor specifier - Type: list of used while monitoring the thermal zone. + Type: list of used while monitoring the thermal zone. phandles + sensor specifier @@ -473,7 +473,7 @@ thermal-zones { <&adc>; /* pcb north */ /* hotspot = 100 * bandgap - 120 * adc + 484 */ - coefficients = <100 -120 484>; + coefficients = <100 -120 484>; trips { ... @@ -502,7 +502,7 @@ from the ADC sensor. The binding would be then: thermal-sensors = <&adc>; /* hotspot = 1 * adc + 6000 */ - coefficients = <1 6000>; + coefficients = <1 6000>; (d) - Board thermal -- cgit v0.10.2 From 829bc78aa7628e81a9de717316b85cbee3c5eb86 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 16 Aug 2016 10:51:38 +0200 Subject: thermal: imx: fix a possible NULL dereference of_match_device could return NULL, and so cause a NULL pointer dereference later at line 472: data->socdata = of_id->data; For fixing this problem, we use of_device_get_match_data(), this will simplify the code a little by using a standard function for getting the match data. Reported-by: coverity (CID 1324128) Signed-off-by: LABBE Corentin Signed-off-by: Zhang Rui diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index c5547bd..e473548 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -471,8 +471,6 @@ MODULE_DEVICE_TABLE(of, of_imx_thermal_match); static int imx_thermal_probe(struct platform_device *pdev) { - const struct of_device_id *of_id = - of_match_device(of_imx_thermal_match, &pdev->dev); struct imx_thermal_data *data; struct regmap *map; int measure_freq; @@ -490,7 +488,7 @@ static int imx_thermal_probe(struct platform_device *pdev) } data->tempmon = map; - data->socdata = of_id->data; + data->socdata = of_device_get_match_data(&pdev->dev); /* make sure the IRQ flag is clear before enabling irq on i.MX6SX */ if (data->socdata->version == TEMPMON_IMX6SX) { -- cgit v0.10.2 From 21eb45db282317543ca46c821bbb8d5075e02cbe Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 19 Aug 2016 09:34:24 +0300 Subject: ASoC: omap-abe-twl6040: Correct dmic-codec device registration The dmic-codec was registered within the platform_driver's probe function, which can cause deferred probe to run in loops as reported and analyzed by Russell King. Use module_init/exit in the driver and handle the dmic-codec device registration and removal at that level instead of the platform_driver probe/remove. Signed-off-by: Peter Ujfalusi Reported-by: Russell King Tested-by: Russell King Signed-off-by: Mark Brown diff --git a/sound/soc/omap/omap-abe-twl6040.c b/sound/soc/omap/omap-abe-twl6040.c index 0843a68..f61b3b5 100644 --- a/sound/soc/omap/omap-abe-twl6040.c +++ b/sound/soc/omap/omap-abe-twl6040.c @@ -38,10 +38,10 @@ struct abe_twl6040 { int jack_detection; /* board can detect jack events */ int mclk_freq; /* MCLK frequency speed for twl6040 */ - - struct platform_device *dmic_codec_dev; }; +struct platform_device *dmic_codec_dev; + static int omap_abe_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -258,8 +258,6 @@ static int omap_abe_probe(struct platform_device *pdev) if (priv == NULL) return -ENOMEM; - priv->dmic_codec_dev = ERR_PTR(-EINVAL); - if (snd_soc_of_parse_card_name(card, "ti,model")) { dev_err(&pdev->dev, "Card name is not provided\n"); return -ENODEV; @@ -284,13 +282,6 @@ static int omap_abe_probe(struct platform_device *pdev) num_links = 2; abe_twl6040_dai_links[1].cpu_of_node = dai_node; abe_twl6040_dai_links[1].platform_of_node = dai_node; - - priv->dmic_codec_dev = platform_device_register_simple( - "dmic-codec", -1, NULL, 0); - if (IS_ERR(priv->dmic_codec_dev)) { - dev_err(&pdev->dev, "Can't instantiate dmic-codec\n"); - return PTR_ERR(priv->dmic_codec_dev); - } } else { num_links = 1; } @@ -299,16 +290,14 @@ static int omap_abe_probe(struct platform_device *pdev) of_property_read_u32(node, "ti,mclk-freq", &priv->mclk_freq); if (!priv->mclk_freq) { dev_err(&pdev->dev, "MCLK frequency not provided\n"); - ret = -EINVAL; - goto err_unregister; + return -EINVAL; } card->fully_routed = 1; if (!priv->mclk_freq) { dev_err(&pdev->dev, "MCLK frequency missing\n"); - ret = -ENODEV; - goto err_unregister; + return -ENODEV; } card->dai_link = abe_twl6040_dai_links; @@ -317,17 +306,9 @@ static int omap_abe_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, priv); ret = snd_soc_register_card(card); - if (ret) { + if (ret) dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", ret); - goto err_unregister; - } - - return 0; - -err_unregister: - if (!IS_ERR(priv->dmic_codec_dev)) - platform_device_unregister(priv->dmic_codec_dev); return ret; } @@ -335,13 +316,9 @@ err_unregister: static int omap_abe_remove(struct platform_device *pdev) { struct snd_soc_card *card = platform_get_drvdata(pdev); - struct abe_twl6040 *priv = snd_soc_card_get_drvdata(card); snd_soc_unregister_card(card); - if (!IS_ERR(priv->dmic_codec_dev)) - platform_device_unregister(priv->dmic_codec_dev); - return 0; } @@ -361,7 +338,33 @@ static struct platform_driver omap_abe_driver = { .remove = omap_abe_remove, }; -module_platform_driver(omap_abe_driver); +static int __init omap_abe_init(void) +{ + int ret; + + dmic_codec_dev = platform_device_register_simple("dmic-codec", -1, NULL, + 0); + if (IS_ERR(dmic_codec_dev)) { + pr_err("%s: dmic-codec device registration failed\n", __func__); + return PTR_ERR(dmic_codec_dev); + } + + ret = platform_driver_register(&omap_abe_driver); + if (ret) { + pr_err("%s: platform driver registration failed\n", __func__); + platform_device_unregister(dmic_codec_dev); + } + + return ret; +} +module_init(omap_abe_init); + +static void __exit omap_abe_exit(void) +{ + platform_driver_unregister(&omap_abe_driver); + platform_device_unregister(dmic_codec_dev); +} +module_exit(omap_abe_exit); MODULE_AUTHOR("Misael Lopez Cruz "); MODULE_DESCRIPTION("ALSA SoC for OMAP boards with ABE and twl6040 codec"); -- cgit v0.10.2 From ba913e4f72fc9cfd03dad968dfb110eb49211d80 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 19 Aug 2016 14:30:29 +0100 Subject: MIPS: KVM: Check for pfn noslot case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When mapping a page into the guest we error check using is_error_pfn(), however this doesn't detect a value of KVM_PFN_NOSLOT, indicating an error HVA for the page. This can only happen on MIPS right now due to unusual memslot management (e.g. being moved / removed / resized), or with an Enhanced Virtual Memory (EVA) configuration where the default KVM_HVA_ERR_* and kvm_is_error_hva() definitions are unsuitable (fixed in a later patch). This case will be treated as a pfn of zero, mapping the first page of physical memory into the guest. It would appear the MIPS KVM port wasn't updated prior to being merged (in v3.10) to take commit 81c52c56e2b4 ("KVM: do not treat noslot pfn as a error pfn") into account (merged v3.8), which converted a bunch of is_error_pfn() calls to is_error_noslot_pfn(). Switch to using is_error_noslot_pfn() instead to catch this case properly. Fixes: 858dd5d45733 ("KVM/MIPS32: MMU/TLB operations for the Guest.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.y- Signed-off-by: Paolo Bonzini diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 6cfdcf5..121008c 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -40,7 +40,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) srcu_idx = srcu_read_lock(&kvm->srcu); pfn = gfn_to_pfn(kvm, gfn); - if (is_error_pfn(pfn)) { + if (is_error_noslot_pfn(pfn)) { kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn); err = -EFAULT; goto out; -- cgit v0.10.2 From 13f479b9df4e2bbf2d16e7e1b02f3f55f70e2455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 17 Aug 2016 09:46:42 +0200 Subject: drm/radeon: fix radeon_move_blit on 32bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug seems to be present for a very long time. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 0c00e19..c2e0a1c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -263,8 +263,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, rdev = radeon_get_rdev(bo->bdev); ridx = radeon_copy_ring_index(rdev); - old_start = old_mem->start << PAGE_SHIFT; - new_start = new_mem->start << PAGE_SHIFT; + old_start = (u64)old_mem->start << PAGE_SHIFT; + new_start = (u64)new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: -- cgit v0.10.2 From 815d27a46f3119f74fe01fe10bf683aa5bc55597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 17 Aug 2016 09:45:25 +0200 Subject: drm/amdgpu: fix amdgpu_move_blit on 32bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug seems to be present for a very long time. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 9b61c8b..141bfca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -251,8 +251,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, adev = amdgpu_get_adev(bo->bdev); ring = adev->mman.buffer_funcs_ring; - old_start = old_mem->start << PAGE_SHIFT; - new_start = new_mem->start << PAGE_SHIFT; + old_start = (u64)old_mem->start << PAGE_SHIFT; + new_start = (u64)new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: -- cgit v0.10.2 From 847927bb3db2b18744ddec1b4fbb274ac7e05199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 17 Aug 2016 14:10:37 +0200 Subject: drm/amdgpu: fix sdma_v2_4_ring_test_ib MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typo in checking the return code. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 1351c7e..a64715d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -714,7 +714,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; - } else if (r) { + } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } -- cgit v0.10.2 From bdf001374bfe12677ae6ec5076452493fb682012 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Aug 2016 19:52:35 +0200 Subject: drm/amdgpu: fix timeout value check in amd_sched_job_recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Could be that we don't actually have a timeout set. Signed-off-by: Christian König Acked-by: Alex Deucher Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ef312bb..963a24d 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -405,7 +405,7 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched) spin_lock(&sched->job_list_lock); s_job = list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node); - if (s_job) + if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) schedule_delayed_work(&s_job->work_tdr, sched->timeout); list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { -- cgit v0.10.2 From 6c647b0eb01cd7326dca093590f5e123e3c68b9c Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Fri, 1 Jul 2016 15:45:57 -0400 Subject: xen-blkfront: fix places not updated after introducing 64KB page granularity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two places didn't get updated when 64KB page granularity was introduced, this patch fix them. Signed-off-by: Bob Liu Acked-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index be4fea6..6a1756d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1315,7 +1315,7 @@ free_shadow: rinfo->ring_ref[i] = GRANT_INVALID_REF; } } - free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); + free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE)); rinfo->ring.sring = NULL; if (rinfo->irq) @@ -2008,7 +2008,7 @@ static int blkif_recover(struct blkfront_info *info) blkfront_gather_backend_features(info); segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_max_segments(info->rq, segs); + blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); for (r_index = 0; r_index < info->nr_rings; r_index++) { struct blkfront_ring_info *rinfo = &info->rinfo[r_index]; -- cgit v0.10.2 From 172335ada40ce26806e514c83a504b45c14a4139 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Fri, 1 Jul 2016 17:43:39 -0400 Subject: xen-blkfront: introduce blkif_set_queue_limits() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blk_mq_update_nr_hw_queues() reset all queue limits to default which it's not as xen-blkfront expected, introducing blkif_set_queue_limits() to reset limits with initial correct values. Signed-off-by: Bob Liu Acked-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6a1756d..f84e220 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -189,6 +189,8 @@ struct blkfront_info struct mutex mutex; struct xenbus_device *xbdev; struct gendisk *gd; + u16 sector_size; + unsigned int physical_sector_size; int vdevice; blkif_vdev_t handle; enum blkif_state connected; @@ -910,9 +912,45 @@ static struct blk_mq_ops blkfront_mq_ops = { .map_queue = blk_mq_map_queue, }; +static void blkif_set_queue_limits(struct blkfront_info *info) +{ + struct request_queue *rq = info->rq; + struct gendisk *gd = info->gd; + unsigned int segments = info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST; + + queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); + + if (info->feature_discard) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); + blk_queue_max_discard_sectors(rq, get_capacity(gd)); + rq->limits.discard_granularity = info->discard_granularity; + rq->limits.discard_alignment = info->discard_alignment; + if (info->feature_secdiscard) + queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); + } + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_logical_block_size(rq, info->sector_size); + blk_queue_physical_block_size(rq, info->physical_sector_size); + blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + /* Make sure we don't use bounce buffers. */ + blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); +} + static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, - unsigned int physical_sector_size, - unsigned int segments) + unsigned int physical_sector_size) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; @@ -944,36 +982,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, } rq->queuedata = info; - queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); - - if (info->feature_discard) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); - blk_queue_max_discard_sectors(rq, get_capacity(gd)); - rq->limits.discard_granularity = info->discard_granularity; - rq->limits.discard_alignment = info->discard_alignment; - if (info->feature_secdiscard) - queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); - } - - /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, sector_size); - blk_queue_physical_block_size(rq, physical_sector_size); - blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); - - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); - - /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); - - /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); - - /* Make sure we don't use bounce buffers. */ - blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); - - gd->queue = rq; + info->rq = gd->queue = rq; + info->gd = gd; + info->sector_size = sector_size; + info->physical_sector_size = physical_sector_size; + blkif_set_queue_limits(info); return 0; } @@ -1136,16 +1149,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->private_data = info; set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, - info->max_indirect_segments ? : - BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { del_gendisk(gd); goto release; } - info->rq = gd->queue; - info->gd = gd; - xlvbd_flush(info); if (vdisk_info & VDISK_READONLY) @@ -2007,6 +2015,8 @@ static int blkif_recover(struct blkfront_info *info) struct split_bio *split_bio; blkfront_gather_backend_features(info); + /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ + blkif_set_queue_limits(info); segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); -- cgit v0.10.2 From 4e876c2bd37fbb5c37a4554a79cf979d486f0e82 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Wed, 27 Jul 2016 17:42:04 +0800 Subject: xen-blkfront: free resources if xlvbd_alloc_gendisk fails Current code forgets to free resources in the failure path of xlvbd_alloc_gendisk(), this patch fix it. Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index f84e220..88ef6d4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2442,7 +2442,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); - return; + goto fail; } xenbus_switch_state(info->xbdev, XenbusStateConnected); @@ -2455,6 +2455,11 @@ static void blkfront_connect(struct blkfront_info *info) device_add_disk(&info->xbdev->dev, info->gd); info->is_ready = 1; + return; + +fail: + blkif_free(info, 0); + return; } /** -- cgit v0.10.2 From 47af45d684b5f3ae000ad448db02ce4f13f73273 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 16 Aug 2016 17:38:54 -0700 Subject: Input: i8042 - set up shared ps2_cmd_mutex for AUX ports The commit 4097461897df ("Input: i8042 - break load dependency ...") correctly set up ps2_cmd_mutex pointer for the KBD port but forgot to do the same for AUX port(s), which results in communication on KBD and AUX ports to clash with each other. Fixes: 4097461897df ("Input: i8042 - break load dependency ...") Reported-by: Bruno Wolff III Tested-by: Bruno Wolff III Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index b4d3408..405252a 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1305,6 +1305,7 @@ static int __init i8042_create_aux_port(int idx) serio->write = i8042_aux_write; serio->start = i8042_start; serio->stop = i8042_stop; + serio->ps2_cmd_mutex = &i8042_mutex; serio->port_data = port; serio->dev.parent = &i8042_platform_device->dev; if (idx < 0) { -- cgit v0.10.2 From 86147e3cfa5e118b61e78f4f0bf29e920dcbd477 Mon Sep 17 00:00:00 2001 From: Liav Rehana Date: Tue, 16 Aug 2016 10:55:35 +0300 Subject: ARC: use correct offset in pt_regs for saving/restoring user mode r25 User mode callee regs are explicitly collected before signal delivery or breakpoint trap. r25 is special for kernel as it serves as task pointer, so user mode value is clobbered very early. It is saved in pt_regs where generally only scratch (aka caller saved) regs are saved. The code to access the corresponding pt_regs location had a subtle bug as it was using load/store with scaling of offset, whereas the offset was already byte wise correct. So fix this by replacing LD.AS with a standard LD Cc: Signed-off-by: Liav Rehana Reviewed-by: Alexey Brodkin [vgupta: rewrote title and commit log] Signed-off-by: Vineet Gupta diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index ad7860c..51597f3 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -142,7 +142,7 @@ #ifdef CONFIG_ARC_CURR_IN_REG ; Retrieve orig r25 and save it with rest of callee_regs - ld.as r12, [r12, PT_user_r25] + ld r12, [r12, PT_user_r25] PUSH r12 #else PUSH r25 @@ -198,7 +198,7 @@ ; SP is back to start of pt_regs #ifdef CONFIG_ARC_CURR_IN_REG - st.as r12, [sp, PT_user_r25] + st r12, [sp, PT_user_r25] #endif .endm -- cgit v0.10.2 From 840c054fd0efb048df6fceb0c46385ec5b66dfe6 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 10 Aug 2016 14:10:57 -0700 Subject: ARC: Support syscall ABI v4 The syscall ABI includes the gcc functional calling ABI since a syscall implies userland caller and kernel callee. The current gcc ABI (v3) for ARCv2 ISA required 64-bit data be passed in even-odd register pairs, (potentially punching reg holes when passing such values as args). This was partly driven by the fact that the double-word LDD/STD instructions in ARCv2 expect the register alignment and thus gcc forcing this avoids extra MOV at the cost of a few unused register (which we have plenty anyways). This however was rejected as part of upstreaming gcc port to HS. So the new ABI v4 doesn't enforce the even-odd reg restriction. Do note that for ARCompact ISA builds v3 and v4 are practically the same in terms of gcc code generation. In terms of change management, we infer the new ABI if gcc 6.x onwards is used for building the kernel. This also needs a stable backport to enable older kernels to work with new tools/user-space Cc: Signed-off-by: Vineet Gupta diff --git a/arch/arc/include/uapi/asm/elf.h b/arch/arc/include/uapi/asm/elf.h index 0f99ac8..0037a58 100644 --- a/arch/arc/include/uapi/asm/elf.h +++ b/arch/arc/include/uapi/asm/elf.h @@ -13,8 +13,15 @@ /* Machine specific ELF Hdr flags */ #define EF_ARC_OSABI_MSK 0x00000f00 -#define EF_ARC_OSABI_ORIG 0x00000000 /* MUST be zero for back-compat */ -#define EF_ARC_OSABI_CURRENT 0x00000300 /* v3 (no legacy syscalls) */ + +#define EF_ARC_OSABI_V3 0x00000300 /* v3 (no legacy syscalls) */ +#define EF_ARC_OSABI_V4 0x00000400 /* v4 (64bit data any reg align) */ + +#if __GNUC__ < 6 +#define EF_ARC_OSABI_CURRENT EF_ARC_OSABI_V3 +#else +#define EF_ARC_OSABI_CURRENT EF_ARC_OSABI_V4 +#endif typedef unsigned long elf_greg_t; typedef unsigned long elf_fpregset_t; diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index b5db9e7..be1972b 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -199,7 +199,7 @@ int elf_check_arch(const struct elf32_hdr *x) } eflags = x->e_flags; - if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) { + if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) { pr_err("ABI mismatch - you need newer toolchain\n"); force_sigsegv(SIGSEGV, current); return 0; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index a946400..f52a0d0d 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -291,8 +291,10 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) cpu->dccm.base_addr, TO_KB(cpu->dccm.sz), cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); - n += scnprintf(buf + n, len - n, - "OS ABI [v3]\t: no-legacy-syscalls\n"); + n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n", + EF_ARC_OSABI_CURRENT >> 8, + EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ? + "no-legacy-syscalls" : "64-bit data any register aligned"); return buf; } -- cgit v0.10.2 From d77976c414ed7f521b9c79b2a9dde0147a3cf754 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 17 Aug 2016 17:34:46 -0700 Subject: ARC: export kmap | MODPOST 7 modules | ERROR: "kmap" [fs/ext2/ext2.ko] undefined! | ../scripts/Makefile.modpost:91: recipe for target '__modpost' failed Cc: Signed-off-by: Vineet Gupta diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index 04f8332..77ff64a 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -61,6 +61,7 @@ void *kmap(struct page *page) return kmap_high(page); } +EXPORT_SYMBOL(kmap); void *kmap_atomic(struct page *page) { -- cgit v0.10.2 From 1c3c909303924d30145601f47b6c058fdd2cbc2e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 16 Aug 2016 18:27:07 -0700 Subject: ARC: mm: fix build breakage with STRICT_MM_TYPECHECKS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit | CC mm/memory.o | In file included from ../mm/memory.c:53:0: | ../include/linux/pfn_t.h: In function ‘pfn_t_pte’: | ../include/linux/pfn_t.h:78:2: error: conversion to non-scalar type requested | return pfn_pte(pfn_t_to_pfn(pfn), pgprot); With STRICT_MM_TYPECHECKS pte_t is a struct and the offending code forces a cast which ends up shifting a struct and hence the gcc warning. Note that in recent past some of the arches (aarch64, s390) made STRICT_MM_TYPECHECKS default, but we don't for ARC as this leads to slightly worse generated code, given ARC ABI definition of returning structs (which pte_t would become) Quoting from ARC ABI... "Results of type struct are returned in a caller-supplied temporary variable whose address is passed in r0. For such functions, the arguments are shifted so that they are passed in r1 and up." So - struct to be returned would be allocated on stack requiring extra code at call sites - callee updates stack memory to facilitate the return (vs. simple MOV into return reg r0) Hence STRICT_MM_TYPECHECKS is not enabled by default for ARC Cc: #4.4+ Signed-off-by: Vineet Gupta diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 0f92d97..89eeb37 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -280,7 +280,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) -#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/ #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -- cgit v0.10.2 From 6040e57658eee6eb1315a26119101ca832d1f854 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Aug 2016 12:47:01 -0700 Subject: Make the hardened user-copy code depend on having a hardened allocator The kernel test robot reported a usercopy failure in the new hardened sanity checks, due to a page-crossing copy of the FPU state into the task structure. This happened because the kernel test robot was testing with SLOB, which doesn't actually do the required book-keeping for slab allocations, and as a result the hardening code didn't realize that the task struct allocation was one single allocation - and the sanity checks fail. Since SLOB doesn't even claim to support hardening (and you really shouldn't use it), the straightforward solution is to just make the usercopy hardening code depend on the allocator supporting it. Reported-by: kernel test robot Cc: Kees Cook Signed-off-by: Linus Torvalds diff --git a/security/Kconfig b/security/Kconfig index df28f2b..da10d9b 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -136,6 +136,7 @@ config HAVE_ARCH_HARDENED_USERCOPY config HARDENED_USERCOPY bool "Harden memory copies between kernel and userspace" depends on HAVE_ARCH_HARDENED_USERCOPY + depends on HAVE_HARDENED_USERCOPY_ALLOCATOR select BUG help This option checks for obviously wrong memory regions when -- cgit v0.10.2 From 9a0fe86745b8e95f7ea39933a956f5771332c430 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Aug 2016 15:33:12 -0400 Subject: pNFS: Handle NFS4ERR_OLD_STATEID correctly in LAYOUTSTAT calls We normally want to update the stateid and then retry, Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6f47527..64b43b4 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -318,10 +318,22 @@ static void nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) { struct nfs42_layoutstat_data *data = calldata; - struct nfs_server *server = NFS_SERVER(data->args.inode); + struct inode *inode = data->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layout_hdr *lo; + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + rpc_exit(task, 0); + return; + } + nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid); + spin_unlock(&inode->i_lock); nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, &data->res.seq_res, task); + } static void @@ -341,11 +353,11 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: spin_lock(&inode->i_lock); lo = NFS_I(inode)->layout; - if (lo && nfs4_stateid_match(&data->args.stateid, + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match(&data->args.stateid, &lo->plh_stateid)) { LIST_HEAD(head); @@ -359,11 +371,23 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) } else spin_unlock(&inode->i_lock); break; + case -NFS4ERR_OLD_STATEID: + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&data->args.stateid, + &lo->plh_stateid)) { + /* Do we need to delay before resending? */ + if (!nfs4_stateid_is_newer(&lo->plh_stateid, + &data->args.stateid)) + rpc_delay(task, HZ); + rpc_restart_call_prepare(task); + } + spin_unlock(&inode->i_lock); + break; case -ENOTSUPP: case -EOPNOTSUPP: NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; - default: - break; } dprintk("%s server returns %d\n", __func__, task->tk_status); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 70806ca..bf98f1b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2510,7 +2510,6 @@ pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) data->args.fh = NFS_FH(inode); data->args.inode = inode; - nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); status = ld->prepare_layoutstats(&data->args); if (status) goto out_free; -- cgit v0.10.2 From c57653dc94d0db7bf63067433ceaa97bdcd0a312 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 19 Aug 2016 13:59:02 -0700 Subject: ARC: export __udivdi3 for modules Some module using div_u64() was failing to link because the libgcc 64-bit divide assist routine was not being exported for modules Reported-by: avinashp@quantenna.com Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta diff --git a/arch/arc/kernel/arcksyms.c b/arch/arc/kernel/arcksyms.c index 4d9e777..000dd04 100644 --- a/arch/arc/kernel/arcksyms.c +++ b/arch/arc/kernel/arcksyms.c @@ -28,6 +28,7 @@ extern void __muldf3(void); extern void __divdf3(void); extern void __floatunsidf(void); extern void __floatunsisf(void); +extern void __udivdi3(void); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); @@ -45,6 +46,7 @@ EXPORT_SYMBOL(__muldf3); EXPORT_SYMBOL(__divdf3); EXPORT_SYMBOL(__floatunsidf); EXPORT_SYMBOL(__floatunsisf); +EXPORT_SYMBOL(__udivdi3); /* ARC optimised assembler routines */ EXPORT_SYMBOL(memset); -- cgit v0.10.2 From 13f9bba7cd4f1665e4091143950a54e3f2984b07 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 18 Aug 2016 21:09:02 +0300 Subject: net/mlx5e: Set port MTU on netdev creation rather on open Port mtu shouldn't be written to hardware on every single interface open. Here we set it only when needed, on change_mtu and netdevice creation. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 870bea3..0fc3a2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1826,10 +1826,6 @@ int mlx5e_open_locked(struct net_device *netdev) netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - err = mlx5e_set_dev_port_mtu(netdev); - if (err) - goto err_clear_state_opened_flag; - err = mlx5e_open_channels(priv); if (err) { netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", @@ -2593,6 +2589,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) mlx5e_close_locked(netdev); netdev->mtu = new_mtu; + mlx5e_set_dev_port_mtu(netdev); if (was_opened) err = mlx5e_open_locked(netdev); @@ -3463,6 +3460,8 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, mlx5e_init_l2_addr(priv); + mlx5e_set_dev_port_mtu(netdev); + err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); -- cgit v0.10.2 From 506753b0b40997756d73f841c884a018f52401a0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 18 Aug 2016 21:09:03 +0300 Subject: net/mlx5e: Optimization for MTU change Avoid unnecessary interface down/up operations upon an MTU change when it does not affect the rings configuration. Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0fc3a2b..65258b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2569,6 +2569,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) u16 max_mtu; u16 min_mtu; int err = 0; + bool reset; mlx5_query_port_max_mtu(mdev, &max_mtu, 1); @@ -2584,14 +2585,18 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) mutex_lock(&priv->state_lock); + reset = !priv->params.lro_en && + (priv->params.rq_wq_type != + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) + if (was_opened && reset) mlx5e_close_locked(netdev); netdev->mtu = new_mtu; mlx5e_set_dev_port_mtu(netdev); - if (was_opened) + if (was_opened && reset) err = mlx5e_open_locked(netdev); mutex_unlock(&priv->state_lock); -- cgit v0.10.2 From 1061c90f524963a0a90e7d2f9a6bfa666458af51 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 18 Aug 2016 21:09:04 +0300 Subject: net/mlx5: Fix pci error recovery flow When PCI error is detected we should save the state of the pci prior to disabling it. Also when receiving pci slot reset call we need to verify that the device is responsive. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4f491d4..2385bae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1420,36 +1420,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv); + pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } -static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err = 0; - - dev_info(&pdev->dev, "%s was called\n", __func__); - - err = mlx5_pci_enable_device(dev); - if (err) { - dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" - , __func__, err); - return PCI_ERS_RESULT_DISCONNECT; - } - pci_set_master(pdev); - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; -} - -void mlx5_disable_device(struct mlx5_core_dev *dev) -{ - mlx5_pci_err_detected(dev->pdev, 0); -} - /* wait for the device to show vital signs by waiting * for the health counter to start counting. */ @@ -1477,21 +1453,44 @@ static int wait_vital(struct pci_dev *pdev) return -ETIMEDOUT; } -static void mlx5_pci_resume(struct pci_dev *pdev) +static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_priv *priv = &dev->priv; int err; dev_info(&pdev->dev, "%s was called\n", __func__); - pci_save_state(pdev); - err = wait_vital(pdev); + err = mlx5_pci_enable_device(dev); if (err) { + dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" + , __func__, err); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + + if (wait_vital(pdev)) { dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); - return; + return PCI_ERS_RESULT_DISCONNECT; } + return PCI_ERS_RESULT_RECOVERED; +} + +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_err_detected(dev->pdev, 0); +} + +static void mlx5_pci_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + int err; + + dev_info(&pdev->dev, "%s was called\n", __func__); + err = mlx5_load_one(dev, priv); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" -- cgit v0.10.2 From 2c0f8ce1b584a4d7b8ff53140d21dfed99834940 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 18 Aug 2016 21:09:05 +0300 Subject: net/mlx5: Added missing check of msg length in verifying its signature Set and verify signature calculates the signature for each of the mailbox nodes, even for those that are unused (from cache). Added a missing length check to set and verify only those which are used. While here, also moved the setting of msg's nodes token to where we already go over them. This saves a pass because checksum is disabled, and the only useful thing remaining that set signature does is setting the token. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d6e2a1c..c2ec01a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) return cmd->cmd_buf + (idx << cmd->log_stride); } -static u8 xor8_buf(void *buf, int len) +static u8 xor8_buf(void *buf, size_t offset, int len) { u8 *ptr = buf; u8 sum = 0; int i; + int end = len + offset; - for (i = 0; i < len; i++) + for (i = offset; i < end; i++) sum ^= ptr[i]; return sum; @@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len) static int verify_block_sig(struct mlx5_cmd_prot_block *block) { - if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + int xor_len = sizeof(*block) - sizeof(block->data) - 1; + + if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) return -EINVAL; - if (xor8_buf(block, sizeof(*block)) != 0xff) + if (xor8_buf(block, 0, sizeof(*block)) != 0xff) return -EINVAL; return 0; } -static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, - int csum) +static void calc_block_sig(struct mlx5_cmd_prot_block *block) { - block->token = token; - if (csum) { - block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - - sizeof(block->data) - 2); - block->sig = ~xor8_buf(block, sizeof(*block) - 1); - } + int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + + block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); + block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); } -static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) +static void calc_chain_sig(struct mlx5_cmd_msg *msg) { struct mlx5_cmd_mailbox *next = msg->next; - - while (next) { - calc_block_sig(next->buf, token, csum); + int size = msg->len; + int blen = size - min_t(int, sizeof(msg->first.data), size); + int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) + / MLX5_CMD_DATA_BLOCK_SIZE; + int i = 0; + + for (i = 0; i < n && next; i++) { + calc_block_sig(next->buf); next = next->next; } } static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) { - ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); - calc_chain_sig(ent->in, ent->token, csum); - calc_chain_sig(ent->out, ent->token, csum); + ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (csum) { + calc_chain_sig(ent->in); + calc_chain_sig(ent->out); + } } static void poll_timeout(struct mlx5_cmd_work_ent *ent) @@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent) struct mlx5_cmd_mailbox *next = ent->out->next; int err; u8 sig; + int size = ent->out->len; + int blen = size - min_t(int, sizeof(ent->out->first.data), size); + int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) + / MLX5_CMD_DATA_BLOCK_SIZE; + int i = 0; - sig = xor8_buf(ent->lay, sizeof(*ent->lay)); + sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); if (sig != 0xff) return -EINVAL; - while (next) { + for (i = 0; i < n && next; i++) { err = verify_block_sig(next->buf); if (err) return err; @@ -656,7 +670,6 @@ static void cmd_work_handler(struct work_struct *work) spin_unlock_irqrestore(&cmd->alloc_lock, flags); } - ent->token = alloc_token(cmd); cmd->ent_arr[ent->idx] = ent; lay = get_inst(cmd, ent->idx); ent->lay = lay; @@ -766,7 +779,8 @@ static u8 *get_status_ptr(struct mlx5_outbox_hdr *out) static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, - void *context, int page_queue, u8 *status) + void *context, int page_queue, u8 *status, + u8 token) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -783,6 +797,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (IS_ERR(ent)) return PTR_ERR(ent); + ent->token = token; + if (!callback) init_completion(&ent->done); @@ -854,7 +870,8 @@ static const struct file_operations fops = { .write = dbg_write, }; -static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, + u8 token) { struct mlx5_cmd_prot_block *block; struct mlx5_cmd_mailbox *next; @@ -880,6 +897,7 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) memcpy(block->data, from, copy); from += copy; size -= copy; + block->token = token; next = next->next; } @@ -949,7 +967,8 @@ static void free_cmd_box(struct mlx5_core_dev *dev, } static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, - gfp_t flags, int size) + gfp_t flags, int size, + u8 token) { struct mlx5_cmd_mailbox *tmp, *head = NULL; struct mlx5_cmd_prot_block *block; @@ -978,6 +997,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, tmp->next = head; block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); block->block_num = cpu_to_be32(n - i - 1); + block->token = token; head = tmp; } msg->next = head; @@ -1352,7 +1372,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, } if (IS_ERR(msg)) - msg = mlx5_alloc_cmd_msg(dev, gfp, in_size); + msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); return msg; } @@ -1377,6 +1397,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int err; u8 status = 0; u32 drv_synd; + u8 token; if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -1395,20 +1416,22 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, return err; } - err = mlx5_copy_to_msg(inb, in, in_size); + token = alloc_token(&dev->cmd); + + err = mlx5_copy_to_msg(inb, in, in_size, token); if (err) { mlx5_core_warn(dev, "err %d\n", err); goto out_in; } - outb = mlx5_alloc_cmd_msg(dev, gfp, out_size); + outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); if (IS_ERR(outb)) { err = PTR_ERR(outb); goto out_in; } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status); + pages_queue, &status, token); if (err) goto out_out; @@ -1476,7 +1499,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&cmd->cache.med.head); for (i = 0; i < NUM_LONG_LISTS; i++) { - msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; @@ -1486,7 +1509,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev) } for (i = 0; i < NUM_MED_LISTS; i++) { - msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; -- cgit v0.10.2 From 6c3b4f90861c7ed59d0287b0ff7f2623f9d93d73 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 18 Aug 2016 21:09:06 +0300 Subject: net/mlx5: Update last-use statistics for flow rules Set lastuse statistic, when number of packets is changed compared to last query. This was wrongly dropped when bulk counter reading was added. Fixes: a351a1b03bf1 ('net/mlx5: Introduce bulk reading of flow counters') Signed-off-by: Amir Vadai Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index c2877e9..3a9195b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -126,12 +126,21 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, for (node = &first->node; node; node = rb_next(node)) { struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node); struct mlx5_fc_cache *c = &counter->cache; + u64 packets; + u64 bytes; if (counter->id > last_id) break; mlx5_cmd_fc_bulk_get(dev, b, - counter->id, &c->packets, &c->bytes); + counter->id, &packets, &bytes); + + if (c->packets == packets) + continue; + + c->packets = packets; + c->bytes = bytes; + c->lastuse = jiffies; } out: -- cgit v0.10.2 From 1dbd0d373ac338903d27fab5204b13122cc5accd Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 18 Aug 2016 21:09:07 +0300 Subject: net/mlx5e: Use correct flow dissector key on flower offloading The wrong key is used when extracting the address type field set by the flower offload code. We have to use the control key and not the basic key, fix that. Fixes: e3a2b7ed018e ('net/mlx5e: Support offload cls_flower with drop action') Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dc8b1cb..22cfc4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -170,7 +170,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, + FLOW_DISSECTOR_KEY_CONTROL, f->key); addr_type = key->addr_type; } -- cgit v0.10.2 From dbe413e3bb93e0634f6d8d00b01cda6f141e0acd Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 18 Aug 2016 21:09:08 +0300 Subject: net/mlx5e: Retrieve the switchdev id from the firmware only once Avoid firmware command execution each time the switchdev HW ID attr get call is made. We do that by reading the ID (PF NIC MAC) only once at load time and store it on the representor structure. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65258b2..03d944c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3387,6 +3387,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); rep.load = mlx5e_nic_rep_load; rep.unload = mlx5e_nic_rep_unload; rep.vport = 0; @@ -3505,16 +3506,20 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); int vport; + u8 mac[ETH_ALEN]; if (!MLX5_CAP_GEN(mdev, vport_group_manager)) return; + mlx5_query_nic_vport_mac_address(mdev, 0, mac); + for (vport = 1; vport < total_vfs; vport++) { struct mlx5_eswitch_rep rep; rep.load = mlx5e_vport_rep_load; rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; + ether_addr_copy(rep.hw_id, mac); mlx5_eswitch_register_vport_rep(esw, &rep); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1c7d8b8..134de4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -135,17 +135,16 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch_rep *rep = priv->ppriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u8 mac[ETH_ALEN]; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac); attr->u.ppid.id_len = ETH_ALEN; - memcpy(&attr->u.ppid.id, &mac, ETH_ALEN); + ether_addr_copy(attr->u.ppid.id, rep->hw_id); break; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index c0b0560..a961409 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -174,6 +174,7 @@ struct mlx5_eswitch_rep { void *priv_data; struct list_head vport_sqs_list; bool valid; + u8 hw_id[ETH_ALEN]; }; struct mlx5_esw_offload { -- cgit v0.10.2 From ef78618b9d29d40c95ca7092493545b8487e226c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 18 Aug 2016 21:09:09 +0300 Subject: net/mlx5: E-Switch, Return the correct devlink e-switch mode Since mlx5 has also the NONE e-switch mode, we must translate from mlx5 mode to devlink mode on the devlink eswitch mode get call, do that. While here, remove the mlx5_ prefix from the static function helpers that deal with the mode to comply with the rest of the code. Fixes: c930a3ad7453 ('net/mlx5e: Add devlink based SRIOV mode change') Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a357e8e..1a3ccbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -535,7 +535,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) esw_destroy_offloads_fdb_table(esw); } -static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) { switch (mode) { case DEVLINK_ESWITCH_MODE_LEGACY: @@ -551,6 +551,22 @@ static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) return 0; } +static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) +{ + switch (mlx5_mode) { + case SRIOV_LEGACY: + *mode = DEVLINK_ESWITCH_MODE_LEGACY; + break; + case SRIOV_OFFLOADS: + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + break; + default: + return -EINVAL; + } + + return 0; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) { struct mlx5_core_dev *dev; @@ -566,7 +582,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) if (cur_mlx5_mode == SRIOV_NONE) return -EOPNOTSUPP; - if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode)) + if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; if (cur_mlx5_mode == mlx5_mode) @@ -592,9 +608,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (dev->priv.eswitch->mode == SRIOV_NONE) return -EOPNOTSUPP; - *mode = dev->priv.eswitch->mode; - - return 0; + return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); } void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, -- cgit v0.10.2 From 1a8ee6f25b10da7e3de0899a184b221d793f2482 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 18 Aug 2016 21:09:10 +0300 Subject: net/mlx5: E-Switch, Set the send-to-vport rules in the correct table While adding actual offloading support to the new switchdev mode, we didn't change the setup of the send-to-vport rules to put them in the slow path table, fix that. Fixes: 1033665e63b6 ('net/mlx5: E-Switch, Use two priorities for SRIOV offloads mode') Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1a3ccbf..3dc83a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -113,7 +113,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, + flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); if (IS_ERR(flow_rule)) -- cgit v0.10.2 From f96750f8d6bd64bb6f5a7a1146121b96bc6115f0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 18 Aug 2016 21:09:11 +0300 Subject: net/mlx5: E-Switch, Avoid ACLs in the offloads mode When we are in the switchdev/offloads mode, HW matching is done as dictated by the offloaded rules and hence we don't need to enable the ACLs mechanism used by the legacy mode. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f6d6677..8b78f15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1451,7 +1451,8 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */ + /* Only VFs need ACLs for VST and spoofchk filtering */ + if (vport_num && esw->mode == SRIOV_LEGACY) { esw_vport_ingress_config(esw, vport); esw_vport_egress_config(esw, vport); } @@ -1502,7 +1503,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; - if (vport_num) { + if (vport_num && esw->mode == SRIOV_LEGACY) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1767,7 +1768,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, vport, err); mutex_lock(&esw->state_lock); - if (evport->enabled) + if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); mutex_unlock(&esw->state_lock); return err; @@ -1839,7 +1840,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); evport->vlan = vlan; evport->qos = qos; - if (evport->enabled) { + if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) goto out; @@ -1868,10 +1869,11 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); pschk = evport->spoofchk; evport->spoofchk = spoofchk; - if (evport->enabled) + if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); - if (err) - evport->spoofchk = pschk; + if (err) + evport->spoofchk = pschk; + } mutex_unlock(&esw->state_lock); return err; -- cgit v0.10.2 From 4c2f2454964477c66ef57745daab203b71783f66 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 18 Aug 2016 14:58:35 -0300 Subject: sctp: linearize early if it's not GSO Because otherwise when crc computation is still needed it's way more expensive than on a linear buffer to the point that it affects performance. It's so expensive that netperf test gives a perf output as below: Overhead Command Shared Object Symbol 18,62% netserver [kernel.vmlinux] [k] crc32_generic_shift 2,57% netserver [kernel.vmlinux] [k] __pskb_pull_tail 1,94% netserver [kernel.vmlinux] [k] fib_table_lookup 1,90% netserver [kernel.vmlinux] [k] copy_user_enhanced_fast_string 1,66% swapper [kernel.vmlinux] [k] intel_idle 1,63% netserver [kernel.vmlinux] [k] _raw_spin_lock 1,59% netserver [sctp] [k] sctp_packet_transmit 1,55% netserver [kernel.vmlinux] [k] memcpy_erms 1,42% netserver [sctp] [k] sctp_rcv # netperf -H 192.168.10.1 -l 10 -t SCTP_STREAM -cC -- -m 12000 SCTP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.1 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 212992 212992 12000 10.00 3016.42 2.88 3.78 1.874 2.462 After patch: Overhead Command Shared Object Symbol 2,75% netserver [kernel.vmlinux] [k] memcpy_erms 2,63% netserver [kernel.vmlinux] [k] copy_user_enhanced_fast_string 2,39% netserver [kernel.vmlinux] [k] fib_table_lookup 2,04% netserver [kernel.vmlinux] [k] __pskb_pull_tail 1,91% netserver [kernel.vmlinux] [k] _raw_spin_lock 1,91% netserver [sctp] [k] sctp_packet_transmit 1,72% netserver [mlx4_en] [k] mlx4_en_process_rx_cq 1,68% netserver [sctp] [k] sctp_rcv # netperf -H 192.168.10.1 -l 10 -t SCTP_STREAM -cC -- -m 12000 SCTP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.1 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 212992 212992 12000 10.00 3681.77 3.83 3.46 2.045 1.849 Fixes: 3acb50c18d8d ("sctp: delay as much as possible skb_linearize") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/input.c b/net/sctp/input.c index c182db7..69444d3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -119,7 +119,13 @@ int sctp_rcv(struct sk_buff *skb) skb_transport_offset(skb)) goto discard_it; - if (!pskb_may_pull(skb, sizeof(struct sctphdr))) + /* If the packet is fragmented and we need to do crc checking, + * it's better to just linearize it otherwise crc computing + * takes longer. + */ + if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && + skb_linearize(skb)) || + !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; /* Pull up the IP header. */ @@ -1177,9 +1183,6 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) return NULL; - if (skb_linearize(skb)) - return NULL; - ch = (sctp_chunkhdr_t *) skb->data; /* The code below will attempt to walk the chunk and extract diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index c30ddb0..6437aa9 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -170,19 +170,6 @@ next_chunk: chunk = list_entry(entry, struct sctp_chunk, list); - /* Linearize if it's not GSO */ - if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP && - skb_is_nonlinear(chunk->skb)) { - if (skb_linearize(chunk->skb)) { - __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); - sctp_chunk_free(chunk); - goto next_chunk; - } - - /* Update sctp_hdr as it probably changed */ - chunk->sctp_hdr = sctp_hdr(chunk->skb); - } - if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) { /* GSO-marked skbs but without frags, handle * them normally -- cgit v0.10.2 From 56cff471d0c62b721a298f806e7637501debb513 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 19 Aug 2016 13:36:23 +0800 Subject: l2tp: Fix the connect status check in pppol2tp_getname The sk->sk_state is bits flag, so need use bit operation check instead of value check. Signed-off-by: Gao Feng Tested-by: Guillaume Nault Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d9560aa..232cb92 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -856,7 +856,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, error = -ENOTCONN; if (sk == NULL) goto end; - if (sk->sk_state != PPPOX_CONNECTED) + if (!(sk->sk_state & PPPOX_CONNECTED)) goto end; error = -EBADF; -- cgit v0.10.2 From 8912862f067276b480c4fed9da74c9c5601130a6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 19 Aug 2016 14:43:48 +0200 Subject: mlxsw: spectrum_buffers: Fix pool value handling in mlxsw_sp_sb_tc_pool_bind_set Pool index has to be converted by get_pool helper to work correctly for egress pool. In mlxsw the egress pool index starts from 0. Fixes: 0f433fa0ecc ("mlxsw: spectrum_buffers: Implement shared buffer configuration") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 237418a..953b214 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -717,22 +717,18 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; enum mlxsw_reg_sbxx_dir dir = pool_type; - u8 pool = pool_index; + u8 pool = pool_get(pool_index); u32 max_buff; int err; + if (dir != dir_get(pool_index)) + return -EINVAL; + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, threshold, &max_buff); if (err) return err; - if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS) { - if (pool < MLXSW_SP_SB_POOL_COUNT) - return -EINVAL; - pool -= MLXSW_SP_SB_POOL_COUNT; - } else if (pool >= MLXSW_SP_SB_POOL_COUNT) { - return -EINVAL; - } return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir, 0, max_buff, pool); } -- cgit v0.10.2 From d5fb46e0e3b7e49ee83ba92efc3ab4e1a545ecc1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:10 -0700 Subject: drm/vc4: Use drm_free_large() on handles to match its allocation. If you managed to exceed the limit to switch to vmalloc, we'd use the wrong free. Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 6155e8a..62df61f 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -572,7 +572,7 @@ vc4_cl_lookup_bos(struct drm_device *dev, spin_unlock(&file_priv->table_lock); fail: - kfree(handles); + drm_free_large(handles); return 0; } -- cgit v0.10.2 From ece7267dccf0e9e08cb6e8dc6b7ad2be9c4eb444 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Jul 2016 11:32:44 -0700 Subject: drm/vc4: Use drm_malloc_ab to fix large rendering jobs. If you exceeded the size that kmalloc would return, you'd get a dmesg warning and a return from the job submit. We can handle much allocations with vmalloc, and drm_malloc_ab makes that decision. Fixes failure in piglit's scissor-many. Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 62df61f..bfd1b52 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -534,8 +534,8 @@ vc4_cl_lookup_bos(struct drm_device *dev, return -EINVAL; } - exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *), - GFP_KERNEL); + exec->bo = drm_calloc_large(exec->bo_count, + sizeof(struct drm_gem_cma_object *)); if (!exec->bo) { DRM_ERROR("Failed to allocate validated BO pointers\n"); return -ENOMEM; @@ -608,7 +608,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) * read the contents back for validation, and I think the * bo->vaddr is uncached access. */ - temp = kmalloc(temp_size, GFP_KERNEL); + temp = drm_malloc_ab(temp_size, 1); if (!temp) { DRM_ERROR("Failed to allocate storage for copying " "in bin/render CLs.\n"); @@ -675,7 +675,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) ret = vc4_validate_shader_recs(dev, exec); fail: - kfree(temp); + drm_free_large(temp); return ret; } @@ -688,7 +688,7 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) if (exec->bo) { for (i = 0; i < exec->bo_count; i++) drm_gem_object_unreference_unlocked(&exec->bo[i]->base); - kfree(exec->bo); + drm_free_large(exec->bo); } while (!list_empty(&exec->unref_list)) { -- cgit v0.10.2 From 163195fc12cae0c8b5c0d74d3ba8d2c5f81773bc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:12 -0700 Subject: drm/vc4: Fix handling of a pm_runtime_get_sync() success case. If the device was already up, a 1 is returned instead of 0. We were erroring out, leading the 3D driver to sometimes fail at screen initialization (generally with ENOENT returned to it). Signed-off-by: Eric Anholt Fixes: af713795c59f ("drm/vc4: Add a getparam ioctl for getting the V3D identity regs.") diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 8b42d31..9ecef93 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -57,21 +57,21 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, switch (args->param) { case DRM_VC4_PARAM_V3D_IDENT0: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - if (ret) + if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT0); pm_runtime_put(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT1: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - if (ret) + if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT1); pm_runtime_put(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT2: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - if (ret) + if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT2); pm_runtime_put(&vc4->v3d->pdev->dev); -- cgit v0.10.2 From def96527707e1978a0c88e75d13b082f51460d5c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:13 -0700 Subject: drm/vc4: Free hang state before destroying BO cache. The BO cache will complain if BOs are still allocated when we try to destroy it (since freeing those BOs would try to hit the cache). You could hit this if you were to unload the module after a GPU hang. Signed-off-by: Eric Anholt Fixes: 214613656b51 ("drm/vc4: Add an interface for capturing the GPU state after a hang.") diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index bfd1b52..fba2c83 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -942,8 +942,8 @@ vc4_gem_destroy(struct drm_device *dev) vc4->overflow_mem = NULL; } - vc4_bo_cache_destroy(dev); - if (vc4->hang_state) vc4_free_hang_state(dev, vc4->hang_state); + + vc4_bo_cache_destroy(dev); } -- cgit v0.10.2 From 9326e6f25574bbb8bd48206d245654780e3fd665 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:14 -0700 Subject: drm/vc4: Fix overflow mem unreferencing when the binner runs dry. Overflow memory handling is tricky: While it's still referenced by the BPO registers, we want to keep it from being freed. When we are putting a new set of overflow memory in the registers, we need to assign the old one to the last rendering job using it. We were looking at "what's currently running in the binner", but since the bin/render submission split, we may end up with the binner completing and having no new job while the renderer is still processing. So, if we don't find a bin job at all, look at the highest-seqno (last) render job to attach our overflow to. Signed-off-by: Eric Anholt Fixes: ca26d28bbaa3 ("drm/vc4: improve throughput by pipelining binning and rendering jobs") Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 489e3de..428e249 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -321,6 +321,15 @@ vc4_first_render_job(struct vc4_dev *vc4) struct vc4_exec_info, head); } +static inline struct vc4_exec_info * +vc4_last_render_job(struct vc4_dev *vc4) +{ + if (list_empty(&vc4->render_job_list)) + return NULL; + return list_last_entry(&vc4->render_job_list, + struct vc4_exec_info, head); +} + /** * struct vc4_texture_sample_info - saves the offsets into the UBO for texture * setup parameters. diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index b0104a34..094bc6a 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -83,8 +83,10 @@ vc4_overflow_mem_work(struct work_struct *work) spin_lock_irqsave(&vc4->job_lock, irqflags); current_exec = vc4_first_bin_job(vc4); + if (!current_exec) + current_exec = vc4_last_render_job(vc4); if (current_exec) { - vc4->overflow_mem->seqno = vc4->finished_seqno + 1; + vc4->overflow_mem->seqno = current_exec->seqno; list_add_tail(&vc4->overflow_mem->unref_head, ¤t_exec->unref_list); vc4->overflow_mem = NULL; -- cgit v0.10.2 From 552416c146fadc67cd9b53ef7adf88d3381c43a6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 26 Jul 2016 13:47:15 -0700 Subject: drm/vc4: Fix oops when userspace hands in a bad BO. We'd end up NULL pointer dereferencing because we didn't take the error path out in the parent. Fixes igt vc4_lookup_fail test. Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index fba2c83..b262c5c 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -573,7 +573,7 @@ vc4_cl_lookup_bos(struct drm_device *dev, fail: drm_free_large(handles); - return 0; + return ret; } static int -- cgit v0.10.2 From c10ac75aeed2d8486a73a316ac3a08f85d140894 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 19 Aug 2016 20:58:26 -0700 Subject: ixgbe: Do not clear RAR entry when clearing VMDq for SAN MAC The RAR entry for the SAN MAC address was being cleared when we were clearing the VMDq pool bits. In order to prevent this we need to add an extra check to protect the SAN MAC from being cleared. Fixes: 6e982aeae ("ixgbe: Clear stale pool mappings") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index b4217f3..c47b605 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -2958,8 +2958,10 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) } /* was that the last pool using this rar? */ - if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) + if (mpsar_lo == 0 && mpsar_hi == 0 && + rar != 0 && rar != hw->mac.san_mac_rar_index) hw->mac.ops.clear_rar(hw, rar); + return 0; } -- cgit v0.10.2 From ff2e7d5d51469e98196f7933c83b781e96517e7c Mon Sep 17 00:00:00 2001 From: Shrikrishna Khare Date: Fri, 19 Aug 2016 10:33:42 -0700 Subject: vmxnet3: fix tx data ring copy for variable size 'Commit 3c8b3efc061a ("vmxnet3: allow variable length transmit data ring buffer")' changed the size of the buffers in the tx data ring from a fixed size of 128 bytes to a variable size. However, while copying data to the data ring, vmxnet3_copy_hdr continues to carry the old code that assumes fixed buffer size of 128. This patch fixes it by adding correct offset based on the actual data ring buffer size. Signed-off-by: Guolin Yang Signed-off-by: Shrikrishna Khare Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index c68fe49..4244b9d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -914,7 +914,9 @@ vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, { struct Vmxnet3_TxDataDesc *tdd; - tdd = tq->data_ring.base + tq->tx_ring.next2fill; + tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base + + tq->tx_ring.next2fill * + tq->txdata_desc_size); memcpy(tdd->data, skb->data, ctx->copy_size); netdev_dbg(adapter->netdev, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 74fc030..7dc37a0 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.9.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.a.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040900 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040a00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ -- cgit v0.10.2 From ae141830b118c3fb5b7eab6fa7c8ab7b7224b0a4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Aug 2016 22:39:02 +0200 Subject: parisc: Fix automatic selection of cr16 clocksource Commit 54b66800907 (parisc: Add native high-resolution sched_clock() implementation) added support to use the CPU-internal cr16 counters as reliable clocksource with the help of HAVE_UNSTABLE_SCHED_CLOCK. Sadly the commit missed to remove the hack which prevented cr16 to become the default clocksource even on SMP systems. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.7+ diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 5adc339..0c2a94a 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -51,8 +51,6 @@ EXPORT_SYMBOL(_parisc_requires_coherency); DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data); -extern int update_cr16_clocksource(void); /* from time.c */ - /* ** PARISC CPU driver - claim "device" and initialize CPU data structures. ** @@ -228,12 +226,6 @@ static int processor_probe(struct parisc_device *dev) } #endif - /* If we've registered more than one cpu, - * we'll use the jiffies clocksource since cr16 - * is not synchronized between CPUs. - */ - update_cr16_clocksource(); - return 0; } diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 505cf1a..4b0b963 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -221,18 +221,6 @@ static struct clocksource clocksource_cr16 = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -int update_cr16_clocksource(void) -{ - /* since the cr16 cycle counters are not synchronized across CPUs, - we'll check if we should switch to a safe clocksource: */ - if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) { - clocksource_change_rating(&clocksource_cr16, 0); - return 1; - } - - return 0; -} - void __init start_cpu_itimer(void) { unsigned int cpu = smp_processor_id(); -- cgit v0.10.2 From 3eb53b20d7bd1374598cfb1feaa081fcac0e76cd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 20 Aug 2016 11:51:38 +0200 Subject: parisc: Fix order of EREFUSED define in errno.h When building gccgo in userspace, errno.h gets parsed and the go include file sysinfo.go is generated. Since EREFUSED is defined to the same value as ECONNREFUSED, and ECONNREFUSED is defined later on in errno.h, this leads to go complaining that EREFUSED isn't defined yet. Fix this trivial problem by moving the define of EREFUSED down after ECONNREFUSED in errno.h (and clean up the indenting while touching this line). Signed-off-by: Helge Deller Cc: stable@vger.kernel.org diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index c0ae625..274d5bc 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -97,10 +97,10 @@ #define ENOTCONN 235 /* Transport endpoint is not connected */ #define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */ #define ETOOMANYREFS 237 /* Too many references: cannot splice */ -#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ +#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ -- cgit v0.10.2 From 6695593e4a7659db49ac6eca98c164f7b5589f72 Mon Sep 17 00:00:00 2001 From: Aleksandr Makarov Date: Sat, 20 Aug 2016 13:29:41 +0300 Subject: USB: serial: option: add WeTelecom WM-D200 Add support for WeTelecom WM-D200. T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22de ProdID=6801 Rev=00.00 S: Manufacturer=WeTelecom Incorporated S: Product=WeTelecom Mobile Products C: #Ifs= 4 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 3 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Signed-off-by: Aleksandr Makarov Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bc47258..bb6a711 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -525,6 +525,10 @@ static void option_instat_callback(struct urb *urb); #define VIATELECOM_VENDOR_ID 0x15eb #define VIATELECOM_PRODUCT_CDS7 0x0001 +/* WeTelecom products */ +#define WETELECOM_VENDOR_ID 0x22de +#define WETELECOM_PRODUCT_WMD200 0x6801 + struct option_blacklist_info { /* bitmask of interface numbers blacklisted for send_setup */ const unsigned long sendsetup; @@ -1991,6 +1995,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, + { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v0.10.2 From 5575cf133cf7f564da991595c6bc9344afa7d89a Mon Sep 17 00:00:00 2001 From: Daniel Romell Date: Fri, 19 Aug 2016 14:12:01 +0200 Subject: net: xilinx: emaclite: Fallback to random MAC address. If the address configured in the device tree is invalid, the driver will fallback to using a random address from the locally administered range. Signed-off-by: Daniel Romell Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 3cee84a..93dc10b 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1131,11 +1131,13 @@ static int xemaclite_of_probe(struct platform_device *ofdev) lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong"); mac_address = of_get_mac_address(ofdev->dev.of_node); - if (mac_address) + if (mac_address) { /* Set the MAC address. */ memcpy(ndev->dev_addr, mac_address, ETH_ALEN); - else - dev_warn(dev, "No MAC address found\n"); + } else { + dev_warn(dev, "No MAC address found, using random\n"); + eth_hw_addr_random(ndev); + } /* Clear the Tx CSR's in case this is a restart */ __raw_writel(0, lp->base_addr + XEL_TSR_OFFSET); -- cgit v0.10.2 From 522caebb2c3684f4a1d154526fb5e33f1381e92a Mon Sep 17 00:00:00 2001 From: Giorgio Dal Molin Date: Tue, 16 Aug 2016 20:43:37 +0200 Subject: iio:ti-ads1015: fix a wrong pointer definition. The call to i2c_get_clientdata(client) returns a struct iio_dev*, not the needed struct ads1015_data*. We need here an intermediate step as in the function: void ads1015_get_channels_config(struct i2c_client *client). Signed-off-by: Giorgio Dal Molin Fixes: ecc24e72f437 ("iio: adc: Add TI ADS1015 ADC driver support") Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index 1ef39877..066abaf 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -489,7 +489,8 @@ static struct iio_info ads1115_info = { #ifdef CONFIG_OF static int ads1015_get_channels_config_of(struct i2c_client *client) { - struct ads1015_data *data = i2c_get_clientdata(client); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct ads1015_data *data = iio_priv(indio_dev); struct device_node *node; if (!client->dev.of_node || -- cgit v0.10.2 From 80e162ee9b31d77d851b10f8c5299132be1e120f Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 29 Jun 2016 20:27:44 +0100 Subject: staging: comedi: daqboard2000: bug fix board type matching code `daqboard2000_find_boardinfo()` is supposed to check if the DaqBoard/2000 series model is supported, based on the PCI subvendor and subdevice ID. The current code is wrong as it is comparing the PCI device's subdevice ID to an expected, fixed value for the subvendor ID. It should be comparing the PCI device's subvendor ID to this fixed value. Correct it. Fixes: 7e8401b23e7f ("staging: comedi: daqboard2000: add back subsystem_device check") Signed-off-by: Ian Abbott Cc: # 3.7+ Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/comedi/drivers/daqboard2000.c b/drivers/staging/comedi/drivers/daqboard2000.c index 65daef0..0f4eb95 100644 --- a/drivers/staging/comedi/drivers/daqboard2000.c +++ b/drivers/staging/comedi/drivers/daqboard2000.c @@ -634,7 +634,7 @@ static const void *daqboard2000_find_boardinfo(struct comedi_device *dev, const struct daq200_boardtype *board; int i; - if (pcidev->subsystem_device != PCI_VENDOR_ID_IOTECH) + if (pcidev->subsystem_vendor != PCI_VENDOR_ID_IOTECH) return NULL; for (i = 0; i < ARRAY_SIZE(boardtypes); i++) { -- cgit v0.10.2 From 403fe7f34e3327ddac2e06a15e76a293d613381e Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 30 Jun 2016 19:58:32 +0100 Subject: staging: comedi: comedi_test: fix timer race conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 73e0e4dfed4c ("staging: comedi: comedi_test: fix timer lock-up") fixed a lock-up in the timer routine `waveform_ai_timer()` (which was called `waveform_ai_interrupt()` at the time) caused by commit 240512474424 ("staging: comedi: comedi_test: use comedi_handle_events()"). However, it introduced a race condition that can result in the timer routine misbehaving, such as accessing freed memory or dereferencing a NULL pointer. 73e0... changed the timer routine to do nothing unless a `WAVEFORM_AI_RUNNING` flag was set, and changed `waveform_ai_cancel()` to clear the flag and replace a call to `del_timer_sync()` with a call to `del_timer()`. `waveform_ai_cancel()` may be called from the timer routine itself (via `comedi_handle_events()`), or from `do_cancel()`. (`do_cancel()` is called as a result of a file operation (usually a `COMEDI_CANCEL` ioctl command, or a release), or during device removal.) When called from `do_cancel()`, the call to `waveform_ai_cancel()` is followed by a call to `do_become_nonbusy()`, which frees up stuff for the current asynchronous command under the assumption that it is now safe to do so. The race condition occurs when the timer routine `waveform_ai_timer()` checks the `WAVEFORM_AI_RUNNING` flag just before it is cleared by `waveform_ai_cancel()`, and is still running during the call to `do_become_nonbusy()`. In particular, it can lead to a NULL pointer dereference: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] waveform_ai_timer+0x17d/0x290 [comedi_test] That corresponds to this line in `waveform_ai_timer()`: unsigned int chanspec = cmd->chanlist[async->cur_chan]; but `do_become_nonbusy()` frees `cmd->chanlist` and sets it to `NULL`. Fix the race by calling `del_timer_sync()` instead of `del_timer()` in `waveform_ai_cancel()` when not in an interrupt context. The only time `waveform_ai_cancel()` is called in an interrupt context is when it is called from the timer routine itself, via `comedi_handle_events()`. There is no longer any need for the `WAVEFORM_AI_RUNNING` flag, so get rid of it. The bug was copied from the AI subdevice to the AO when support for commands on the AO subdevice was added by commit 0cf55bbef2f9 ("staging: comedi: comedi_test: implement commands on AO subdevice"). That involves the timer routine `waveform_ao_timer()`, the comedi "cancel" routine `waveform_ao_cancel()`, and the flag `WAVEFORM_AO_RUNNING`. Fix it in the same way as for the AI subdevice. Fixes: 73e0e4dfed4c ("staging: comedi: comedi_test: fix timer lock-up") Fixes: 0cf55bbef2f9 ("staging: comedi: comedi_test: implement commands on AO subdevice") Reported-by: Éric Piel Signed-off-by: Ian Abbott Cc: # 4.4+ Cc: Éric Piel Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/comedi/drivers/comedi_test.c b/drivers/staging/comedi/drivers/comedi_test.c index 4ab1866..ec5b9a2 100644 --- a/drivers/staging/comedi/drivers/comedi_test.c +++ b/drivers/staging/comedi/drivers/comedi_test.c @@ -56,11 +56,6 @@ #define N_CHANS 8 -enum waveform_state_bits { - WAVEFORM_AI_RUNNING, - WAVEFORM_AO_RUNNING -}; - /* Data unique to this driver */ struct waveform_private { struct timer_list ai_timer; /* timer for AI commands */ @@ -68,7 +63,6 @@ struct waveform_private { unsigned int wf_amplitude; /* waveform amplitude in microvolts */ unsigned int wf_period; /* waveform period in microseconds */ unsigned int wf_current; /* current time in waveform period */ - unsigned long state_bits; unsigned int ai_scan_period; /* AI scan period in usec */ unsigned int ai_convert_period; /* AI conversion period in usec */ struct timer_list ao_timer; /* timer for AO commands */ @@ -191,10 +185,6 @@ static void waveform_ai_timer(unsigned long arg) unsigned int nsamples; unsigned int time_increment; - /* check command is still active */ - if (!test_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits)) - return; - now = ktime_to_us(ktime_get()); nsamples = comedi_nsamples_left(s, UINT_MAX); @@ -386,11 +376,6 @@ static int waveform_ai_cmd(struct comedi_device *dev, */ devpriv->ai_timer.expires = jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1; - - /* mark command as active */ - smp_mb__before_atomic(); - set_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); add_timer(&devpriv->ai_timer); return 0; } @@ -400,11 +385,12 @@ static int waveform_ai_cancel(struct comedi_device *dev, { struct waveform_private *devpriv = dev->private; - /* mark command as no longer active */ - clear_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); - /* cannot call del_timer_sync() as may be called from timer routine */ - del_timer(&devpriv->ai_timer); + if (in_softirq()) { + /* Assume we were called from the timer routine itself. */ + del_timer(&devpriv->ai_timer); + } else { + del_timer_sync(&devpriv->ai_timer); + } return 0; } @@ -436,10 +422,6 @@ static void waveform_ao_timer(unsigned long arg) u64 scans_since; unsigned int scans_avail = 0; - /* check command is still active */ - if (!test_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits)) - return; - /* determine number of scan periods since last time */ now = ktime_to_us(ktime_get()); scans_since = now - devpriv->ao_last_scan_time; @@ -518,11 +500,6 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev, devpriv->ao_last_scan_time = ktime_to_us(ktime_get()); devpriv->ao_timer.expires = jiffies + usecs_to_jiffies(devpriv->ao_scan_period); - - /* mark command as active */ - smp_mb__before_atomic(); - set_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); add_timer(&devpriv->ao_timer); return 1; @@ -608,11 +585,12 @@ static int waveform_ao_cancel(struct comedi_device *dev, struct waveform_private *devpriv = dev->private; s->async->inttrig = NULL; - /* mark command as no longer active */ - clear_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits); - smp_mb__after_atomic(); - /* cannot call del_timer_sync() as may be called from timer routine */ - del_timer(&devpriv->ao_timer); + if (in_softirq()) { + /* Assume we were called from the timer routine itself. */ + del_timer(&devpriv->ao_timer); + } else { + del_timer_sync(&devpriv->ao_timer); + } return 0; } -- cgit v0.10.2 From 5ca05345c56cb979e1a25ab6146437002f95cac8 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 20 Jul 2016 17:07:34 +0100 Subject: staging: comedi: ni_mio_common: fix wrong insn_write handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For counter subdevices, the `s->insn_write` handler is being set to the wrong function, `ni_tio_insn_read()`. It should be `ni_tio_insn_write()`. Signed-off-by: Ian Abbott Reported-by: Éric Piel Fixes: 10f74377eec3 ("staging: comedi: ni_tio: make ni_tio_winsn() a proper comedi (*insn_write)" Cc: # 3.17+ Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 8dabb19..3cf3c05 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -5480,7 +5480,7 @@ static int ni_E_init(struct comedi_device *dev, s->maxdata = (devpriv->is_m_series) ? 0xffffffff : 0x00ffffff; s->insn_read = ni_tio_insn_read; - s->insn_write = ni_tio_insn_read; + s->insn_write = ni_tio_insn_write; s->insn_config = ni_tio_insn_config; #ifdef PCIDMA if (dev->irq && devpriv->mite) { -- cgit v0.10.2 From c71f20ee76342376e3c4c67cdbe7421d8c4e886e Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 29 Jul 2016 09:43:56 -0400 Subject: staging: comedi: adv_pci1760: Do not return EINVAL for CMDF_ROUND_DOWN. The CMDF_ROUND_DOWN case falls through and so always returns -EINVAL. Fixes: 14b93bb6bbf0 ("staging: comedi: adv_pci_dio: separate out PCI-1760 support") Signed-off-by: Phil Turnbull Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/comedi/drivers/adv_pci1760.c b/drivers/staging/comedi/drivers/adv_pci1760.c index d7dd1e5..9f525ff 100644 --- a/drivers/staging/comedi/drivers/adv_pci1760.c +++ b/drivers/staging/comedi/drivers/adv_pci1760.c @@ -196,6 +196,7 @@ static int pci1760_pwm_ns_to_div(unsigned int flags, unsigned int ns) break; case CMDF_ROUND_DOWN: divisor = ns / PCI1760_PWM_TIMEBASE; + break; default: return -EINVAL; } -- cgit v0.10.2 From 5ac5c3bcf57419d0aa3f53b12b8c07599a13fdcc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Jun 2016 14:46:21 +0300 Subject: staging: comedi: dt2811: fix a precedence bug Bitwise | has higher precedence than ?: so we need to add some parenthesis for this to work as intended. Fixes: 7c9574090d30 ('staging: comedi: dt2811: simplify A/D reference configuration') Signed-off-by: Dan Carpenter Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/comedi/drivers/dt2811.c b/drivers/staging/comedi/drivers/dt2811.c index 904f6377..8bbd938 100644 --- a/drivers/staging/comedi/drivers/dt2811.c +++ b/drivers/staging/comedi/drivers/dt2811.c @@ -588,8 +588,8 @@ static int dt2811_attach(struct comedi_device *dev, struct comedi_devconfig *it) s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | - (it->options[2] == 1) ? SDF_DIFF : - (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND; + ((it->options[2] == 1) ? SDF_DIFF : + (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND); s->n_chan = (it->options[2] == 1) ? 8 : 16; s->maxdata = 0x0fff; s->range_table = board->is_pgh ? &dt2811_pgh_ai_ranges -- cgit v0.10.2 From f0f4b0cc3a8cffd983f5940d46cd0227f3f5710a Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 19 Jul 2016 12:17:39 +0100 Subject: staging: comedi: ni_mio_common: fix AO inttrig backwards compatibility Commit ebb657babfa9 ("staging: comedi: ni_mio_common: clarify the cmd->start_arg validation and use") introduced a backwards compatibility issue in the use of asynchronous commands on the AO subdevice when `start_src` is `TRIG_EXT`. Valid values for `start_src` are `TRIG_INT` (for internal, software trigger), and `TRIG_EXT` (for external trigger). When set to `TRIG_EXT`. In both cases, the driver relies on an internal, software trigger to set things up (allowing the user application to write sufficient samples to the data buffer before the trigger), so it acts as a software "pre-trigger" in the `TRIG_EXT` case. The software trigger is handled by `ni_ao_inttrig()`. Prior to the above change, when `start_src` was `TRIG_INT`, `start_arg` was required to be 0, and `ni_ao_inttrig()` checked that the software trigger number was also 0. After the above change, when `start_src` was `TRIG_INT`, any value was allowed for `start_arg`, and `ni_ao_inttrig()` checked that the software trigger number matched this `start_arg` value. The backwards compatibility issue is that the internal trigger number now has to match `start_arg` when `start_src` is `TRIG_EXT` when it previously had to be 0. Fix the backwards compatibility issue in `ni_ao_inttrig()` by always allowing software trigger number 0 when `start_src` is something other than `TRIG_INT`. Thanks to Spencer Olson for reporting the issue. Signed-off-by: Ian Abbott Reported-by: Spencer Olson Fixes: ebb657babfa9 ("staging: comedi: ni_mio_common: clarify the cmd->start_arg validation and use") Cc: stable Reviewed-by: H Hartley Sweeten Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 3cf3c05..0f97d7b 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -2772,7 +2772,15 @@ static int ni_ao_inttrig(struct comedi_device *dev, int i; static const int timeout = 1000; - if (trig_num != cmd->start_arg) + /* + * Require trig_num == cmd->start_arg when cmd->start_src == TRIG_INT. + * For backwards compatibility, also allow trig_num == 0 when + * cmd->start_src != TRIG_INT (i.e. when cmd->start_src == TRIG_EXT); + * in that case, the internal trigger is being used as a pre-trigger + * before the external trigger. + */ + if (!(trig_num == cmd->start_arg || + (trig_num == 0 && cmd->start_src != TRIG_INT))) return -EINVAL; /* -- cgit v0.10.2 From fcf68f3c0bb2a541aa47a2a38b8939edf84fd529 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 8 Aug 2016 17:19:38 -0700 Subject: iio: fix sched WARNING "do not call blocking ops when !TASK_RUNNING" When using CONFIG_DEBUG_ATOMIC_SLEEP, the scheduler nicely points out that we're calling sleeping primitives within the wait_event loop, which means we might clobber the task state: [ 10.831289] do not call blocking ops when !TASK_RUNNING; state=1 set at [] [ 10.845531] ------------[ cut here ]------------ [ 10.850161] WARNING: at kernel/sched/core.c:7630 ... [ 12.164333] ---[ end trace 45409966a9a76438 ]--- [ 12.168942] Call trace: [ 12.171391] [] __might_sleep+0x64/0x90 [ 12.176699] [] mutex_lock_nested+0x50/0x3fc [ 12.182440] [] iio_kfifo_buf_data_available+0x28/0x4c [ 12.189043] [] iio_buffer_ready+0x60/0xe0 [ 12.194608] [] iio_buffer_read_first_n_outer+0x108/0x1a8 [ 12.201474] [] __vfs_read+0x58/0x114 [ 12.206606] [] vfs_read+0x94/0x118 [ 12.211564] [] SyS_read+0x64/0xb4 [ 12.216436] [] el0_svc_naked+0x24/0x28 To avoid this, we should (a la https://lwn.net/Articles/628628/) use the wait_woken() function, which avoids the nested sleeping while still handling races between waiting / wake-events. Signed-off-by: Brian Norris Reviewed-by: Lars-Peter Clausen Cc: # 3.19+ for introduction of wake_woken Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 90462fc..49bf9c5 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -107,6 +107,7 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf, { struct iio_dev *indio_dev = filp->private_data; struct iio_buffer *rb = indio_dev->buffer; + DEFINE_WAIT_FUNC(wait, woken_wake_function); size_t datum_size; size_t to_wait; int ret; @@ -131,19 +132,29 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf, else to_wait = min_t(size_t, n / datum_size, rb->watermark); + add_wait_queue(&rb->pollq, &wait); do { - ret = wait_event_interruptible(rb->pollq, - iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)); - if (ret) - return ret; + if (!indio_dev->info) { + ret = -ENODEV; + break; + } - if (!indio_dev->info) - return -ENODEV; + if (!iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)) { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + wait_woken(&wait, TASK_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + continue; + } ret = rb->access->read_first_n(rb, n, buf); if (ret == 0 && (filp->f_flags & O_NONBLOCK)) ret = -EAGAIN; } while (ret == 0); + remove_wait_queue(&rb->pollq, &wait); return ret; } -- cgit v0.10.2 From ca64d4bc80a88845f7e1e266dbff798f928bcc06 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 25 Jul 2016 23:06:56 +0100 Subject: iio: chemical: atlas-ph-sensor: fix typo in val assignment Fix an incorrect assignment due to a typo on a variable name. The variable val2 should be assigned 100000 and not val. Signed-off-by: Colin Ian King Reviewed-By: Matt Ranostay Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c index ae038a5..407f141 100644 --- a/drivers/iio/chemical/atlas-ph-sensor.c +++ b/drivers/iio/chemical/atlas-ph-sensor.c @@ -434,7 +434,7 @@ static int atlas_read_raw(struct iio_dev *indio_dev, break; case IIO_ELECTRICALCONDUCTIVITY: *val = 1; /* 0.00001 */ - *val = 100000; + *val2 = 100000; break; case IIO_CONCENTRATION: *val = 0; /* 0.000000001 */ -- cgit v0.10.2 From d9a8594011080def9202f2c258f755647fe66683 Mon Sep 17 00:00:00 2001 From: Aditya Shankar Date: Tue, 2 Aug 2016 11:49:00 +0200 Subject: MAINTAINERS: Update maintainer entry for wilc1000 Take the maintenance of the Atmel WIFI staging driver wilc1000. Former maintainers are no more with Atmel. Reported-by: Loic Lefort Signed-off-by: Aditya Shankar Signed-off-by: Ganesh Krishna Acked-by: Luis de Bethencourt Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman diff --git a/MAINTAINERS b/MAINTAINERS index a306795..09c7066 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11217,12 +11217,8 @@ S: Odd Fixes F: drivers/staging/vt665?/ STAGING - WILC1000 WIFI DRIVER -M: Johnny Kim -M: Austin Shin -M: Chris Park -M: Tony Cho -M: Glen Lee -M: Leo Kim +M: Aditya Shankar +M: Ganesh Krishna L: linux-wireless@vger.kernel.org S: Supported F: drivers/staging/wilc1000/ -- cgit v0.10.2 From e9d766b965a15816cf9ec353d86c11f27b783d4d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:38 +0200 Subject: MAINTAINERS: Add file patterns for ion device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Sumit Semwal Cc: devel@driverdev.osuosl.org Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman diff --git a/MAINTAINERS b/MAINTAINERS index 09c7066..e614392 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -798,6 +798,7 @@ M: Laura Abbott M: Sumit Semwal L: devel@driverdev.osuosl.org S: Supported +F: Documentation/devicetree/bindings/staging/ion/ F: drivers/staging/android/ion F: drivers/staging/android/uapi/ion.h F: drivers/staging/android/uapi/ion_test.h -- cgit v0.10.2 From 23535c1322e42e71f32bfbeae9970f4dba31e3bd Mon Sep 17 00:00:00 2001 From: Binoy Jayan Date: Thu, 21 Jul 2016 13:26:56 +0530 Subject: staging: wilc1000: txq_event: Fix coding error Fix incorrect usage of completion interface by replacing 'wait_for_completion' with 'complete'. This error was introduced accidentally while replacing semaphores with mutexes. Reported-by: Jiri Slaby Signed-off-by: Binoy Jayan Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c index 3a66255..3221511 100644 --- a/drivers/staging/wilc1000/linux_wlan.c +++ b/drivers/staging/wilc1000/linux_wlan.c @@ -648,7 +648,7 @@ void wilc1000_wlan_deinit(struct net_device *dev) mutex_unlock(&wl->hif_cs); } if (&wl->txq_event) - wait_for_completion(&wl->txq_event); + complete(&wl->txq_event); wlan_deinitialize_threads(dev); deinit_irq(dev); -- cgit v0.10.2 From 23436825e671cdd55c45d151ddc66fd3c47d10e9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 16 Jul 2016 13:07:55 +0300 Subject: staging: wilc1000: NULL dereference on error We can't pass NULL pointers to destroy_workqueue(). Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/wilc1000/host_interface.c b/drivers/staging/wilc1000/host_interface.c index 0b1760c..78f524f 100644 --- a/drivers/staging/wilc1000/host_interface.c +++ b/drivers/staging/wilc1000/host_interface.c @@ -3363,7 +3363,7 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler) if (!hif_workqueue) { netdev_err(vif->ndev, "Failed to create workqueue\n"); result = -ENOMEM; - goto _fail_mq_; + goto _fail_; } setup_timer(&periodic_rssi, GetPeriodicRSSI, @@ -3391,7 +3391,6 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler) clients_count++; -_fail_mq_: destroy_workqueue(hif_workqueue); _fail_: return result; -- cgit v0.10.2 From 6c08fda0306916135291103f23cc17248c422c49 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 15 Aug 2016 17:09:52 +0100 Subject: staging: wilc1000: correctly check if associatedsta has not been found The current check for associatedsta being set to -1 to indicate it has not been found is not working because associatedsta is initialized to zero and will never be -1. Fix this by initializing it to ~0 and checking for ~0 instead. Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c index 9092600..2c2e8ac 100644 --- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c +++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c @@ -1191,7 +1191,7 @@ static int get_station(struct wiphy *wiphy, struct net_device *dev, struct wilc_priv *priv; struct wilc_vif *vif; u32 i = 0; - u32 associatedsta = 0; + u32 associatedsta = ~0; u32 inactive_time = 0; priv = wiphy_priv(wiphy); vif = netdev_priv(dev); @@ -1204,7 +1204,7 @@ static int get_station(struct wiphy *wiphy, struct net_device *dev, } } - if (associatedsta == -1) { + if (associatedsta == ~0) { netdev_err(dev, "sta required is not associated\n"); return -ENOENT; } -- cgit v0.10.2 From c0678b2d6648ab65b68703044709e367799ba9f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Aug 2016 15:52:23 -0700 Subject: include/linux: fix excess fence.h kernel-doc notation Fix excess fields in kernel-doc notation in after some struct fields were removed. Fixes these kernel-doc warnings: ..//include/linux/fence.h:85: warning: Excess struct/union/enum/typedef member 'child_list' description in 'fence' ..//include/linux/fence.h:85: warning: Excess struct/union/enum/typedef member 'active_list' description in 'fence' Fixes: 0431b9065f28 ("staging/android: bring struct sync_pt back") Cc: Daniel Vetter Cc: Sumit Semwal Cc: Luis de Bethencourt Signed-off-by: Randy Dunlap Reviewed-by: Gustavo Padovan Signed-off-by: Greg Kroah-Hartman diff --git a/include/linux/fence.h b/include/linux/fence.h index 8cc719a..2ac6fa5 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -49,8 +49,6 @@ struct fence_cb; * @timestamp: Timestamp when the fence was signaled. * @status: Optional, only valid if < 0, must be set before calling * fence_signal, indicates that the fence has completed with an error. - * @child_list: list of children fences - * @active_list: list of active fences * * the flags member must be manipulated and read using the appropriate * atomic ops (bit_*), so taking the spinlock will not be needed most -- cgit v0.10.2 From 4ec656bdf43a13a655a8259b79dd63bc1f0b1e41 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sat, 20 Aug 2016 16:27:58 -0700 Subject: EDAC, skx_edac: Add EDAC driver for Skylake This is an entirely new driver instead of yet another set of patches to sb_edac.c because: 1) Mapping from PCI devices to socket/memory controller is significantly different. Skylake scatters devices on a socket across a number of PCI buses. 2) There is an extra level of interleaving via the "mcroute" register that would be a little messy to squeeze into the old driver. 3) Validation is getting too expensive. Changes to sb_edac need to be checked against Sandy Bridge, Ivy Bridge, Haswell, Broadwell and Knights Landing. Acked-by: Aristeu Rozanski Acked-by: Borislav Petkov Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index a306795..0bbe4b1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4525,6 +4525,12 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/sb_edac.c +EDAC-SKYLAKE +M: Tony Luck +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/skx_edac.c + EDAC-XGENE APPLIED MICRO (APM) X-GENE SOC EDAC M: Loc Ho diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index d0c1dab..dff1a4a 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -251,6 +251,14 @@ config EDAC_SBRIDGE Support for error detection and correction the Intel Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. +config EDAC_SKX + tristate "Intel Skylake server Integrated MC" + depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL + depends on PCI_MMCONFIG + help + Support for error detection and correction the Intel + Skylake server Integrated Memory Controllers. + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index f9e4a3e..9860499 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o +obj-$(CONFIG_EDAC_SKX) += skx_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c new file mode 100644 index 0000000..0ff4878 --- /dev/null +++ b/drivers/edac/skx_edac.c @@ -0,0 +1,1121 @@ +/* + * EDAC driver for Intel(R) Xeon(R) Skylake processors + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "edac_core.h" + +#define SKX_REVISION " Ver: 1.0 " + +/* + * Debug macros + */ +#define skx_printk(level, fmt, arg...) \ + edac_printk(level, "skx", fmt, ##arg) + +#define skx_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) + +/* + * Get a bit field at register value , from bit to bit + */ +#define GET_BITFIELD(v, lo, hi) \ + (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) + +static LIST_HEAD(skx_edac_list); + +static u64 skx_tolm, skx_tohm; + +#define NUM_IMC 2 /* memory controllers per socket */ +#define NUM_CHANNELS 3 /* channels per memory controller */ +#define NUM_DIMMS 2 /* Max DIMMS per channel */ + +#define MASK26 0x3FFFFFF /* Mask for 2^26 */ +#define MASK29 0x1FFFFFFF /* Mask for 2^29 */ + +/* + * Each cpu socket contains some pci devices that provide global + * information, and also some that are local to each of the two + * memory controllers on the die. + */ +struct skx_dev { + struct list_head list; + u8 bus[4]; + struct pci_dev *sad_all; + struct pci_dev *util_all; + u32 mcroute; + struct skx_imc { + struct mem_ctl_info *mci; + u8 mc; /* system wide mc# */ + u8 lmc; /* socket relative mc# */ + u8 src_id, node_id; + struct skx_channel { + struct pci_dev *cdev; + struct skx_dimm { + u8 close_pg; + u8 bank_xor_enable; + u8 fine_grain_bank; + u8 rowbits; + u8 colbits; + } dimms[NUM_DIMMS]; + } chan[NUM_CHANNELS]; + } imc[NUM_IMC]; +}; +static int skx_num_sockets; + +struct skx_pvt { + struct skx_imc *imc; +}; + +struct decoded_addr { + struct skx_dev *dev; + u64 addr; + int socket; + int imc; + int channel; + u64 chan_addr; + int sktways; + int chanways; + int dimm; + int rank; + int channel_rank; + u64 rank_address; + int row; + int column; + int bank_address; + int bank_group; +}; + +static struct skx_dev *get_skx_dev(u8 bus, u8 idx) +{ + struct skx_dev *d; + + list_for_each_entry(d, &skx_edac_list, list) { + if (d->bus[idx] == bus) + return d; + } + + return NULL; +} + +enum munittype { + CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD +}; + +struct munit { + u16 did; + u16 devfn[NUM_IMC]; + u8 busidx; + u8 per_socket; + enum munittype mtype; +}; + +/* + * List of PCI device ids that we need together with some device + * number and function numbers to tell which memory controller the + * device belongs to. + */ +static const struct munit skx_all_munits[] = { + { 0x2054, { }, 1, 1, SAD_ALL }, + { 0x2055, { }, 1, 1, UTIL_ALL }, + { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 }, + { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 }, + { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 }, + { 0x208e, { }, 1, 0, SAD }, + { } +}; + +/* + * We use the per-socket device 0x2016 to count how many sockets are present, + * and to detemine which PCI buses are associated with each socket. Allocate + * and build the full list of all the skx_dev structures that we need here. + */ +static int get_all_bus_mappings(void) +{ + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + int ndev = 0; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev); + if (!pdev) + break; + ndev++; + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + pci_dev_put(pdev); + return -ENOMEM; + } + pci_read_config_dword(pdev, 0xCC, ®); + d->bus[0] = GET_BITFIELD(reg, 0, 7); + d->bus[1] = GET_BITFIELD(reg, 8, 15); + d->bus[2] = GET_BITFIELD(reg, 16, 23); + d->bus[3] = GET_BITFIELD(reg, 24, 31); + edac_dbg(2, "busses: %x, %x, %x, %x\n", + d->bus[0], d->bus[1], d->bus[2], d->bus[3]); + list_add_tail(&d->list, &skx_edac_list); + skx_num_sockets++; + prev = pdev; + } + + return ndev; +} + +static int get_all_munits(const struct munit *m) +{ + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + int i = 0, ndev = 0; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev); + if (!pdev) + break; + ndev++; + if (m->per_socket == NUM_IMC) { + for (i = 0; i < NUM_IMC; i++) + if (m->devfn[i] == pdev->devfn) + break; + if (i == NUM_IMC) + goto fail; + } + d = get_skx_dev(pdev->bus->number, m->busidx); + if (!d) + goto fail; + + /* Be sure that the device is enabled */ + if (unlikely(pci_enable_device(pdev) < 0)) { + skx_printk(KERN_ERR, + "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did); + goto fail; + } + + switch (m->mtype) { + case CHAN0: case CHAN1: case CHAN2: + pci_dev_get(pdev); + d->imc[i].chan[m->mtype].cdev = pdev; + break; + case SAD_ALL: + pci_dev_get(pdev); + d->sad_all = pdev; + break; + case UTIL_ALL: + pci_dev_get(pdev); + d->util_all = pdev; + break; + case SAD: + /* + * one of these devices per core, including cores + * that don't exist on this SKU. Ignore any that + * read a route table of zero, make sure all the + * non-zero values match. + */ + pci_read_config_dword(pdev, 0xB4, ®); + if (reg != 0) { + if (d->mcroute == 0) + d->mcroute = reg; + else if (d->mcroute != reg) { + skx_printk(KERN_ERR, + "mcroute mismatch\n"); + goto fail; + } + } + ndev--; + break; + } + + prev = pdev; + } + + return ndev; +fail: + pci_dev_put(pdev); + return -ENODEV; +} + +const struct x86_cpu_id skx_cpuids[] = { + { X86_VENDOR_INTEL, 6, 0x55, 0, 0 }, /* Skylake */ + { } +}; +MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); + +static u8 get_src_id(struct skx_dev *d) +{ + u32 reg; + + pci_read_config_dword(d->util_all, 0xF0, ®); + + return GET_BITFIELD(reg, 12, 14); +} + +static u8 skx_get_node_id(struct skx_dev *d) +{ + u32 reg; + + pci_read_config_dword(d->util_all, 0xF4, ®); + + return GET_BITFIELD(reg, 0, 2); +} + +static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, + int maxval, char *name) +{ + u32 val = GET_BITFIELD(reg, lobit, hibit); + + if (val < minval || val > maxval) { + edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg); + return -EINVAL; + } + return val + add; +} + +#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15) + +#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks") +#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows") +#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols") + +static int get_width(u32 mtr) +{ + switch (GET_BITFIELD(mtr, 8, 9)) { + case 0: + return DEV_X4; + case 1: + return DEV_X8; + case 2: + return DEV_X16; + } + return DEV_UNKNOWN; +} + +static int skx_get_hi_lo(void) +{ + struct pci_dev *pdev; + u32 reg; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL); + if (!pdev) { + edac_dbg(0, "Can't get tolm/tohm\n"); + return -ENODEV; + } + + pci_read_config_dword(pdev, 0xD0, ®); + skx_tolm = reg; + pci_read_config_dword(pdev, 0xD4, ®); + skx_tohm = reg; + pci_read_config_dword(pdev, 0xD8, ®); + skx_tohm |= (u64)reg << 32; + + pci_dev_put(pdev); + edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm); + + return 0; +} + +static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, + struct skx_imc *imc, int chan, int dimmno) +{ + int banks = 16, ranks, rows, cols, npages; + u64 size; + + if (!IS_DIMM_PRESENT(mtr)) + return 0; + ranks = numrank(mtr); + rows = numrow(mtr); + cols = numcol(mtr); + + /* + * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) + */ + size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); + npages = MiB_TO_PAGES(size); + + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + imc->mc, chan, dimmno, size, npages, + banks, ranks, rows, cols); + + imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0); + imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9); + imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); + imc->chan[chan].dimms[dimmno].rowbits = rows; + imc->chan[chan].dimms[dimmno].colbits = cols; + + dimm->nr_pages = npages; + dimm->grain = 32; + dimm->dtype = get_width(mtr); + dimm->mtype = MEM_DDR4; + dimm->edac_mode = EDAC_SECDED; /* likely better than this */ + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); + + return 1; +} + +#define SKX_GET_MTMTR(dev, reg) \ + pci_read_config_dword((dev), 0x87c, ®) + +static bool skx_check_ecc(struct pci_dev *pdev) +{ + u32 mtmtr; + + SKX_GET_MTMTR(pdev, mtmtr); + + return !!GET_BITFIELD(mtmtr, 2, 2); +} + +static int skx_get_dimm_config(struct mem_ctl_info *mci) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + int i, j; + u32 mtr, amap; + int ndimms; + + for (i = 0; i < NUM_CHANNELS; i++) { + ndimms = 0; + pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); + for (j = 0; j < NUM_DIMMS; j++) { + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); + pci_read_config_dword(imc->chan[i].cdev, + 0x80 + 4*j, &mtr); + ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j); + } + if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) { + skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); + return -ENODEV; + } + } + + return 0; +} + +static void skx_unregister_mci(struct skx_imc *imc) +{ + struct mem_ctl_info *mci = imc->mci; + + if (!mci) + return; + + edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(mci->pdev); + + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); + kfree(mci->ctl_name); + edac_mc_free(mci); +} + +static int skx_register_mci(struct skx_imc *imc) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct pci_dev *pdev = imc->chan[0].cdev; + struct skx_pvt *pvt; + int rc; + + /* allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANNELS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = NUM_DIMMS; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, + sizeof(struct skx_pvt)); + + if (unlikely(!mci)) + return -ENOMEM; + + edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); + + /* Associate skx_dev and mci for future usage */ + imc->mci = mci; + pvt = mci->pvt_info; + pvt->imc = imc; + + mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d", + imc->node_id, imc->lmc); + mci->mtype_cap = MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "skx_edac.c"; + mci->dev_name = pci_name(imc->chan[0].cdev); + mci->mod_ver = SKX_REVISION; + mci->ctl_page_to_phys = NULL; + + rc = skx_get_dimm_config(mci); + if (rc < 0) + goto fail; + + /* record ptr to the generic device */ + mci->pdev = &pdev->dev; + + /* add this new MC control structure to EDAC's list of MCs */ + if (unlikely(edac_mc_add_mc(mci))) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + rc = -EINVAL; + goto fail; + } + + return 0; + +fail: + kfree(mci->ctl_name); + edac_mc_free(mci); + imc->mci = NULL; + return rc; +} + +#define SKX_MAX_SAD 24 + +#define SKX_GET_SAD(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), ®) +#define SKX_GET_ILV(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), ®) + +#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31) +#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27) +#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26) +#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6) +#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4) +#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2) +#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0) + +#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0) +#define SKX_ILV_TARGET(tgt) ((tgt) & 7) + +static bool skx_sad_decode(struct decoded_addr *res) +{ + struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list); + u64 addr = res->addr; + int i, idx, tgt, lchan, shift; + u32 sad, ilv; + u64 limit, prev_limit; + int remote = 0; + + /* Simple sanity check for I/O space or out of range */ + if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) { + edac_dbg(0, "Address %llx out of range\n", addr); + return false; + } + +restart: + prev_limit = 0; + for (i = 0; i < SKX_MAX_SAD; i++) { + SKX_GET_SAD(d, i, sad); + limit = SKX_SAD_LIMIT(sad); + if (SKX_SAD_ENABLE(sad)) { + if (addr >= prev_limit && addr <= limit) + goto sad_found; + } + prev_limit = limit + 1; + } + edac_dbg(0, "No SAD entry for %llx\n", addr); + return false; + +sad_found: + SKX_GET_ILV(d, i, ilv); + + switch (SKX_SAD_INTERLEAVE(sad)) { + case 0: + idx = GET_BITFIELD(addr, 6, 8); + break; + case 1: + idx = GET_BITFIELD(addr, 8, 10); + break; + case 2: + idx = GET_BITFIELD(addr, 12, 14); + break; + case 3: + idx = GET_BITFIELD(addr, 30, 32); + break; + } + + tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3); + + /* If point to another node, find it and start over */ + if (SKX_ILV_REMOTE(tgt)) { + if (remote) { + edac_dbg(0, "Double remote!\n"); + return false; + } + remote = 1; + list_for_each_entry(d, &skx_edac_list, list) { + if (d->imc[0].src_id == SKX_ILV_TARGET(tgt)) + goto restart; + } + edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt)); + return false; + } + + if (SKX_SAD_MOD3(sad) == 0) + lchan = SKX_ILV_TARGET(tgt); + else { + switch (SKX_SAD_MOD3MODE(sad)) { + case 0: + shift = 6; + break; + case 1: + shift = 8; + break; + case 2: + shift = 12; + break; + default: + edac_dbg(0, "illegal mod3mode\n"); + return false; + } + switch (SKX_SAD_MOD3ASMOD2(sad)) { + case 0: + lchan = (addr >> shift) % 3; + break; + case 1: + lchan = (addr >> shift) % 2; + break; + case 2: + lchan = (addr >> shift) % 2; + lchan = (lchan << 1) | ~lchan; + break; + case 3: + lchan = ((addr >> shift) % 2) << 1; + break; + } + lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1); + } + + res->dev = d; + res->socket = d->imc[0].src_id; + res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2); + res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19); + + edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n", + res->addr, res->socket, res->imc, res->channel); + return true; +} + +#define SKX_MAX_TAD 8 + +#define SKX_GET_TADBASE(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), ®) +#define SKX_GET_TADWAYNESS(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), ®) +#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), ®) + +#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26) +#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5) +#define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7) +#define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26) +#define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26) +#define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11)) +#define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1) + +/* which bit used for both socket and channel interleave */ +static int skx_granularity[] = { 6, 8, 12, 30 }; + +static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits) +{ + addr >>= shift; + addr /= ways; + addr <<= shift; + + return addr | (lowbits & ((1ull << shift) - 1)); +} + +static bool skx_tad_decode(struct decoded_addr *res) +{ + int i; + u32 base, wayness, chnilvoffset; + int skt_interleave_bit, chn_interleave_bit; + u64 channel_addr; + + for (i = 0; i < SKX_MAX_TAD; i++) { + SKX_GET_TADBASE(res->dev, res->imc, i, base); + SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness); + if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness)) + goto tad_found; + } + edac_dbg(0, "No TAD entry for %llx\n", res->addr); + return false; + +tad_found: + res->sktways = SKX_TAD_SKTWAYS(wayness); + res->chanways = SKX_TAD_CHNWAYS(wayness); + skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)]; + chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)]; + + SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset); + channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset); + + if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) { + /* Must handle channel first, then socket */ + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, channel_addr); + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, channel_addr); + } else { + /* Handle socket then channel. Preserve low bits from original address */ + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, res->addr); + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, res->addr); + } + + res->chan_addr = channel_addr; + + edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n", + res->addr, res->chan_addr, res->sktways, res->chanways); + return true; +} + +#define SKX_MAX_RIR 4 + +#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x108 + 4 * (i), ®) +#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x120 + 16 * idx + 4 * (i), ®) + +#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31) +#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29) +#define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29)) +#define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19) +#define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26)) + +static bool skx_rir_decode(struct decoded_addr *res) +{ + int i, idx, chan_rank; + int shift; + u32 rirway, rirlv; + u64 rank_addr, prev_limit = 0, limit; + + if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg) + shift = 6; + else + shift = 13; + + for (i = 0; i < SKX_MAX_RIR; i++) { + SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway); + limit = SKX_RIR_LIMIT(rirway); + if (SKX_RIR_VALID(rirway)) { + if (prev_limit <= res->chan_addr && + res->chan_addr <= limit) + goto rir_found; + } + prev_limit = limit; + } + edac_dbg(0, "No RIR entry for %llx\n", res->addr); + return false; + +rir_found: + rank_addr = res->chan_addr >> shift; + rank_addr /= SKX_RIR_WAYS(rirway); + rank_addr <<= shift; + rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0); + + res->rank_address = rank_addr; + idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway); + + SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv); + res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv); + chan_rank = SKX_RIR_CHAN_RANK(rirlv); + res->channel_rank = chan_rank; + res->dimm = chan_rank / 4; + res->rank = chan_rank % 4; + + edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n", + res->addr, res->dimm, res->rank, + res->channel_rank, res->rank_address); + return true; +} + +static u8 skx_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 +}; +static u8 skx_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; +static u8 skx_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 +}; +static u8 skx_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; +static u8 skx_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int skx_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool skx_mad_decode(struct decoded_addr *r) +{ + struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm]; + int bg0 = dimm->fine_grain_bank ? 6 : 13; + + if (dimm->close_pg) { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row); + r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column); + r->column |= 0x400; /* C10 is autoprecharge, always set */ + r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28); + r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21); + } else { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row); + if (dimm->fine_grain_bank) + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column); + else + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column); + r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23); + r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21); + } + r->row &= (1u << dimm->rowbits) - 1; + + edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n", + r->addr, r->row, r->column, r->bank_address, + r->bank_group); + return true; +} + +static bool skx_decode(struct decoded_addr *res) +{ + + return skx_sad_decode(res) && skx_tad_decode(res) && + skx_rir_decode(res) && skx_mad_decode(res); +} + +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr. + * Write an address to this file to exercise the address decode + * logic in this driver. + */ +static struct dentry *skx_test; +static u64 skx_fake_addr; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct decoded_addr res; + + res.addr = val; + skx_decode(&res); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static struct dentry *mydebugfs_create(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_u64_wo); +} + +static void setup_skx_debug(void) +{ + skx_test = debugfs_create_dir("skx_edac_test", NULL); + mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr); +} + +static void teardown_skx_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +#else +static void setup_skx_debug(void) +{ +} + +static void teardown_skx_debug(void) +{ +} +#endif /*CONFIG_EDAC_DEBUG*/ + +static void skx_mce_output_error(struct mem_ctl_info *mci, + const struct mce *m, + struct decoded_addr *res) +{ + enum hw_event_mc_err_type tp_event; + char *type, *optype, msg[256]; + bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); + bool overflow = GET_BITFIELD(m->status, 62, 62); + bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); + bool recoverable; + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + + recoverable = GET_BITFIELD(m->status, 56, 56); + + if (uncorrected_error) { + if (ripv) { + type = "FATAL"; + tp_event = HW_EVENT_ERR_FATAL; + } else { + type = "NON_FATAL"; + tp_event = HW_EVENT_ERR_UNCORRECTED; + } + } else { + type = "CORRECTED"; + tp_event = HW_EVENT_ERR_CORRECTED; + } + + /* + * According with Table 15-9 of the Intel Architecture spec vol 3A, + * memory errors should fit in this mask: + * 000f 0000 1mmm cccc (binary) + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + * If the mask doesn't match, report an error to the parsing logic + */ + if (!((errcode & 0xef80) == 0x80)) { + optype = "Can't parse: it is not a mem"; + } else { + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + } + + snprintf(msg, sizeof(msg), + "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + mscod, errcode, + res->socket, res->imc, res->rank, + res->bank_group, res->bank_address, res->row, res->column); + + edac_dbg(0, "%s\n", msg); + + /* Call the helper to output message */ + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + res->channel, res->dimm, -1, + optype, msg); +} + +static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + struct decoded_addr res; + struct mem_ctl_info *mci; + char *type; + + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; + + /* ignore unless this is memory related with an address */ + if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) + return NOTIFY_DONE; + + res.addr = mce->addr; + if (!skx_decode(&res)) + return NOTIFY_DONE; + mci = res.dev->imc[res.imc].mci; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); + + skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " + "Bank %d: %016Lx\n", mce->extcpu, type, + mce->mcgstatus, mce->bank, mce->status); + skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); + skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); + skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); + + skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " + "%u APIC %x\n", mce->cpuvendor, mce->cpuid, + mce->time, mce->socketid, mce->apicid); + + skx_mce_output_error(mci, mce, &res); + + return NOTIFY_DONE; +} + +static struct notifier_block skx_mce_dec = { + .notifier_call = skx_mce_check_error, +}; + +static void skx_remove(void) +{ + int i, j; + struct skx_dev *d, *tmp; + + edac_dbg(0, "\n"); + + list_for_each_entry_safe(d, tmp, &skx_edac_list, list) { + list_del(&d->list); + for (i = 0; i < NUM_IMC; i++) { + skx_unregister_mci(&d->imc[i]); + for (j = 0; j < NUM_CHANNELS; j++) + pci_dev_put(d->imc[i].chan[j].cdev); + } + pci_dev_put(d->util_all); + pci_dev_put(d->sad_all); + + kfree(d); + } +} + +/* + * skx_init: + * make sure we are running on the correct cpu model + * search for all the devices we need + * check which DIMMs are present. + */ +int __init skx_init(void) +{ + const struct x86_cpu_id *id; + const struct munit *m; + int rc = 0, i; + u8 mc = 0, src_id, node_id; + struct skx_dev *d; + + edac_dbg(2, "\n"); + + id = x86_match_cpu(skx_cpuids); + if (!id) + return -ENODEV; + + rc = skx_get_hi_lo(); + if (rc) + return rc; + + rc = get_all_bus_mappings(); + if (rc < 0) + goto fail; + if (rc == 0) { + edac_dbg(2, "No memory controllers found\n"); + return -ENODEV; + } + + for (m = skx_all_munits; m->did; m++) { + rc = get_all_munits(m); + if (rc < 0) + goto fail; + if (rc != m->per_socket * skx_num_sockets) { + edac_dbg(2, "Expected %d, got %d of %x\n", + m->per_socket * skx_num_sockets, rc, m->did); + rc = -ENODEV; + goto fail; + } + } + + list_for_each_entry(d, &skx_edac_list, list) { + src_id = get_src_id(d); + node_id = skx_get_node_id(d); + edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); + for (i = 0; i < NUM_IMC; i++) { + d->imc[i].mc = mc++; + d->imc[i].lmc = i; + d->imc[i].src_id = src_id; + d->imc[i].node_id = node_id; + rc = skx_register_mci(&d->imc[i]); + if (rc < 0) + goto fail; + } + } + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + setup_skx_debug(); + + mce_register_decode_chain(&skx_mce_dec); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit skx_exit(void) +{ + edac_dbg(2, "\n"); + mce_unregister_decode_chain(&skx_mce_dec); + skx_remove(); + teardown_skx_debug(); +} + +module_init(skx_init); +module_exit(skx_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors"); -- cgit v0.10.2 From 90c43ec6997a892448f1f86180a515f59cafd8a3 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Wed, 17 Aug 2016 17:43:00 +0530 Subject: iio: adc: ti_am335x_adc: Protect FIFO1 from concurrent access It is possible that two or more ADC channels can be simultaneously requested for raw samples, in which case there can be race in access to FIFO data resulting in loss of samples. If am335x_tsc_se_set_once() is called again from tiadc_read_raw(), when ADC is still acquired to sample one of the channels, the second process might be put into uninterruptible sleep state. Fix these issues, by protecting FIFO access and channel configurations with a mutex. Since tiadc_read_raw() might take anywhere between few microseconds to few milliseconds to finish execution (depending on averaging and delay values supplied via DT), its better to use mutex instead of spinlock. Fixes: 7ca6740cd1cd4 ("mfd: input: iio: ti_amm335x: Rework TSC/ADC synchronization") Signed-off-by: Vignesh R Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 8a36875..bed9977 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -32,6 +32,7 @@ struct tiadc_device { struct ti_tscadc_dev *mfd_tscadc; + struct mutex fifo1_lock; /* to protect fifo access */ int channels; u8 channel_line[8]; u8 channel_step[8]; @@ -359,6 +360,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct tiadc_device *adc_dev = iio_priv(indio_dev); + int ret = IIO_VAL_INT; int i, map_val; unsigned int fifo1count, read, stepid; bool found = false; @@ -372,6 +374,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, if (!step_en) return -EINVAL; + mutex_lock(&adc_dev->fifo1_lock); fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT); while (fifo1count--) tiadc_readl(adc_dev, REG_FIFO1); @@ -388,7 +391,8 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, if (time_after(jiffies, timeout)) { am335x_tsc_se_adc_done(adc_dev->mfd_tscadc); - return -EAGAIN; + ret = -EAGAIN; + goto err_unlock; } } map_val = adc_dev->channel_step[chan->scan_index]; @@ -414,8 +418,11 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, am335x_tsc_se_adc_done(adc_dev->mfd_tscadc); if (found == false) - return -EBUSY; - return IIO_VAL_INT; + ret = -EBUSY; + +err_unlock: + mutex_unlock(&adc_dev->fifo1_lock); + return ret; } static const struct iio_info tiadc_info = { @@ -483,6 +490,7 @@ static int tiadc_probe(struct platform_device *pdev) tiadc_step_config(indio_dev); tiadc_writel(adc_dev, REG_FIFO1THR, FIFO1_THRESHOLD); + mutex_init(&adc_dev->fifo1_lock); err = tiadc_channel_init(indio_dev, adc_dev->channels); if (err < 0) -- cgit v0.10.2 From 7175cce1c3f1d8c8840d2004f78f96a3904249b5 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Wed, 17 Aug 2016 17:43:01 +0530 Subject: iio: adc: ti_am335x_adc: Increase timeout value waiting for ADC sample Now that open delay and sample delay for each channel is configurable via DT, the default IDLE_TIMEOUT value is not enough as this is calculated based on hardcoded macros. This results in driver returning EBUSY sometimes. Fix this by increasing the timeout value based on maximum value possible to open delay and sample delays for each channel. Fixes: 5dc11e810676e ("iio: adc: ti_am335x_adc: make sample delay, open delay, averaging DT parameters") Signed-off-by: Vignesh R Acked-by: Lee Jones Cc: Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index bed9977..c3cfacc 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -381,7 +381,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, am335x_tsc_se_set_once(adc_dev->mfd_tscadc, step_en); - timeout = jiffies + usecs_to_jiffies + timeout = jiffies + msecs_to_jiffies (IDLE_TIMEOUT * adc_dev->channels); /* Wait for Fifo threshold interrupt */ while (1) { diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 2567a87..7f55b8b 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -138,16 +138,16 @@ /* * time in us for processing a single channel, calculated as follows: * - * num cycles = open delay + (sample delay + conv time) * averaging + * max num cycles = open delay + (sample delay + conv time) * averaging * - * num cycles: 152 + (1 + 13) * 16 = 376 + * max num cycles: 262143 + (255 + 13) * 16 = 266431 * * clock frequency: 26MHz / 8 = 3.25MHz * clock period: 1 / 3.25MHz = 308ns * - * processing time: 376 * 308ns = 116us + * max processing time: 266431 * 308ns = 83ms(approx) */ -#define IDLE_TIMEOUT 116 /* microsec */ +#define IDLE_TIMEOUT 83 /* milliseconds */ #define TSCADC_CELLS 2 -- cgit v0.10.2 From 7ac61a062f3147dc23e3f12b9dfe7c4dd35f9cb8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 16 Aug 2016 15:33:28 +0200 Subject: iio: accel: kxsd9: Fix raw read return Any readings from the raw interface of the KXSD9 driver will return an empty string, because it does not return IIO_VAL_INT but rather some random value from the accelerometer to the caller. Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 3a9f106..da5fb67 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -160,6 +160,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev, if (ret < 0) goto error_ret; *val = ret; + ret = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); -- cgit v0.10.2 From d524d84b588e300418a99794eb5066683ec7c488 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 21 Aug 2016 15:24:33 +0100 Subject: net: tehuti: fix typo: "eneble" -> "enable" trivial typo fix in pr_err message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 7452b5f..7108c68 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1987,7 +1987,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if ((readl(nic->regs + FPGA_VER) & 0xFFF) >= 378) { err = pci_enable_msi(pdev); if (err) - pr_err("Can't eneble msi. error is %d\n", err); + pr_err("Can't enable msi. error is %d\n", err); else nic->irq_type = IRQ_MSI; } else -- cgit v0.10.2 From fa8410b355251fd30341662a40ac6b22d3e38468 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 Aug 2016 16:14:10 -0700 Subject: Linux 4.8-rc3 diff --git a/Makefile b/Makefile index 5c18baa..3537aa2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v0.10.2 From e340eca90ee18547027a6d5745ffabf7d173dfcc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 14 Aug 2016 13:53:17 -0700 Subject: powerpc: cputhreads: Add missing include file Powerpc builds may fail with the following build error. Error log: In file included from ./arch/powerpc/include/asm/mmu_context.h:11:0, from ./include/linux/mmu_context.h:4, from mm/mmu_context.c:8: ./arch/powerpc/include/asm/cputhreads.h: In function 'get_tensr': ./arch/powerpc/include/asm/cputhreads.h:101:2: error: implicit declaration of function 'cpu_has_feature' The problem can be triggered by configuring ppc64e_defconfig and selecting CONFIG_TICK_CPU_ACCOUNTING instead of CONFIG_VIRT_CPU_ACCOUNTING_NATIVE. Fixes: b92a226e5284 ("powerpc: Move cpu_has_feature() to a separate file") Signed-off-by: Guenter Roeck Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 666bef4e..9377bdf 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -3,6 +3,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * Mapping of threads to cores -- cgit v0.10.2 From aded76bac54d1dd0a9c9203aa29a78423edc4cde Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 14 Aug 2016 21:42:17 +0200 Subject: drivers/macintosh: Delete owner assignment The field "owner" is set by core. Thus delete an extra initialisation. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Markus Elfring Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/macintosh/ams/ams-i2c.c b/drivers/macintosh/ams/ams-i2c.c index 978eda8..8a3ba56 100644 --- a/drivers/macintosh/ams/ams-i2c.c +++ b/drivers/macintosh/ams/ams-i2c.c @@ -73,7 +73,6 @@ MODULE_DEVICE_TABLE(i2c, ams_id); static struct i2c_driver ams_i2c_driver = { .driver = { .name = "ams", - .owner = THIS_MODULE, }, .probe = ams_i2c_probe, .remove = ams_i2c_remove, diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c index 3024685..96d16fc 100644 --- a/drivers/macintosh/windfarm_pm112.c +++ b/drivers/macintosh/windfarm_pm112.c @@ -668,7 +668,6 @@ static struct platform_driver wf_pm112_driver = { .remove = wf_pm112_remove, .driver = { .name = "windfarm", - .owner = THIS_MODULE, }, }; diff --git a/drivers/macintosh/windfarm_pm72.c b/drivers/macintosh/windfarm_pm72.c index 2f506b9..e88cfb3 100644 --- a/drivers/macintosh/windfarm_pm72.c +++ b/drivers/macintosh/windfarm_pm72.c @@ -789,7 +789,6 @@ static struct platform_driver wf_pm72_driver = { .remove = wf_pm72_remove, .driver = { .name = "windfarm", - .owner = THIS_MODULE, }, }; diff --git a/drivers/macintosh/windfarm_rm31.c b/drivers/macintosh/windfarm_rm31.c index 82fc86a..bdfcb8a 100644 --- a/drivers/macintosh/windfarm_rm31.c +++ b/drivers/macintosh/windfarm_rm31.c @@ -682,7 +682,6 @@ static struct platform_driver wf_rm31_driver = { .remove = wf_rm31_remove, .driver = { .name = "windfarm", - .owner = THIS_MODULE, }, }; -- cgit v0.10.2 From e72e799c0901b689c83468623cfcb85df431e882 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 15 Aug 2016 22:07:54 +0200 Subject: powerpc/512x: Delete unnecessary assignment for the field "owner" The field "owner" is set by the core. Thus delete an unneeded initialisation. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Markus Elfring Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c index 8eb82b0..d93dd4a 100644 --- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c +++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c @@ -528,7 +528,6 @@ static struct platform_driver mpc512x_lpbfifo_driver = { .remove = mpc512x_lpbfifo_remove, .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .of_match_table = mpc512x_lpbfifo_match, }, }; -- cgit v0.10.2 From f5ed841ce75ca33f413c394110d6dc002c097b01 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 15 Aug 2016 22:36:05 +0200 Subject: powerpc: mpc8349emitx: Delete unnecessary assignment for the field "owner" The field "owner" is set by the core. Thus delete an unneeded initialisation. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Markus Elfring Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index dbcd030..63c5ab64 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -222,7 +222,6 @@ static const struct of_device_id mcu_of_match_table[] = { static struct i2c_driver mcu_driver = { .driver = { .name = "mcu-mpc8349emitx", - .owner = THIS_MODULE, .of_match_table = mcu_of_match_table, }, .probe = mcu_probe, -- cgit v0.10.2 From 6f38a8b9a45833495dc878c335c5431cd98a16ed Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 18 Aug 2016 17:35:14 +1000 Subject: cxl: use pcibios_free_controller_deferred() when removing vPHBs When cxl removes a vPHB, it's possible that the pci_controller may be freed before all references to the devices on the vPHB have been released. This in turn causes an invalid memory access when the devices are eventually released, as pcibios_release_device() attempts to call the phb's release_device hook. In cxl_pci_vphb_remove(), remove the existing call to pcibios_free_controller(). Instead, use pcibios_free_controller_deferred() to free the pci_controller after all devices have been released. Export pci_set_host_bridge_release() so we can do this. Cc: stable@vger.kernel.org Signed-off-by: Andrew Donnellan Reviewed-by: Matthew R. Ochs Acked-by: Ian Munsie Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 7ada5f1..3519ace 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -230,6 +230,11 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) if (phb->bus == NULL) return -ENXIO; + /* Set release hook on root bus */ + pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge), + pcibios_free_controller_deferred, + (void *) phb); + /* Claim resources. This might need some rework as well depending * whether we are doing probe-only or not, like assigning unassigned * resources etc... @@ -256,7 +261,10 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) afu->phb = NULL; pci_remove_root_bus(phb->bus); - pcibios_free_controller(phb); + /* + * We don't free phb here - that's handled by + * pcibios_free_controller_deferred() + */ } static bool _cxl_pci_is_vphb_device(struct pci_controller *phb) diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c index 5f4a2e0..add6623 100644 --- a/drivers/pci/host-bridge.c +++ b/drivers/pci/host-bridge.c @@ -44,6 +44,7 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge, bridge->release_fn = release_fn; bridge->release_data = release_data; } +EXPORT_SYMBOL_GPL(pci_set_host_bridge_release); void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region, struct resource *res) -- cgit v0.10.2 From 2dd9c11b9d4dfbd6c070eab7b81197f65e82f1a0 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Thu, 11 Aug 2016 17:25:40 -0300 Subject: powerpc/pseries: use pci_host_bridge.release_fn() to kfree(phb) This patch leverages 'struct pci_host_bridge' from the PCI subsystem in order to free the pci_controller only after the last reference to its devices is dropped (avoiding an oops in pcibios_release_device() if the last reference is dropped after pcibios_free_controller()). The patch relies on pci_host_bridge.release_fn() (and .release_data), which is called automatically by the PCI subsystem when the root bus is released (i.e., the last reference is dropped). Those fields are set via pci_set_host_bridge_release() (e.g. in the platform-specific implementation of pcibios_root_bridge_prepare()). It introduces the 'pcibios_free_controller_deferred()' .release_fn() and it expects .release_data to hold a pointer to the pci_controller. The function implictly calls 'pcibios_free_controller()', so an user must *NOT* explicitly call it if using the new _deferred() callback. The functionality is enabled for pseries (although it isn't platform specific, and may be used by cxl). Details on not-so-elegant design choices: - Use 'pci_host_bridge.release_data' field as pointer to associated 'struct pci_controller' so *not* to 'pci_bus_to_host(bridge->bus)' in pcibios_free_controller_deferred(). That's because pci_remove_root_bus() sets 'host_bridge->bus = NULL' (so, if the last reference is released after pci_remove_root_bus() runs, which eventually reaches pcibios_free_controller_deferred(), that would hit a null pointer dereference). The cxl/vphb.c code calls pci_remove_root_bus(), and the cxl folks are interested in this fix. Test-case #1 (hold references) # ls -ld /sys/block/sd* | grep -m1 0021:01:00.0 <...> /sys/block/sdaa -> ../devices/pci0021:01/0021:01:00.0/<...> # ls -ld /sys/block/sd* | grep -m1 0021:01:00.1 <...> /sys/block/sdab -> ../devices/pci0021:01/0021:01:00.1/<...> # cat >/dev/sdaa & pid1=$! # cat >/dev/sdab & pid2=$! # drmgr -w 5 -d 1 -c phb -s 'PHB 33' -r Validating PHB DLPAR capability...yes. [ 594.306719] pci_hp_remove_devices: PCI: Removing devices on bus 0021:01 [ 594.306738] pci_hp_remove_devices: Removing 0021:01:00.0... ... [ 598.236381] pci_hp_remove_devices: Removing 0021:01:00.1... ... [ 611.972077] pci_bus 0021:01: busn_res: [bus 01-ff] is released [ 611.972140] rpadlpar_io: slot PHB 33 removed # kill -9 $pid1 # kill -9 $pid2 [ 632.918088] pcibios_free_controller_deferred: domain 33, dynamic 1 Test-case #2 (don't hold references) # drmgr -w 5 -d 1 -c phb -s 'PHB 33' -r Validating PHB DLPAR capability...yes. [ 916.357363] pci_hp_remove_devices: PCI: Removing devices on bus 0021:01 [ 916.357386] pci_hp_remove_devices: Removing 0021:01:00.0... ... [ 920.566527] pci_hp_remove_devices: Removing 0021:01:00.1... ... [ 933.955873] pci_bus 0021:01: busn_res: [bus 01-ff] is released [ 933.955977] pcibios_free_controller_deferred: domain 33, dynamic 1 [ 933.955999] rpadlpar_io: slot PHB 33 removed Suggested-By: Gavin Shan Signed-off-by: Mauricio Faria de Oliveira Reviewed-by: Gavin Shan Reviewed-by: Andrew Donnellan Tested-by: Andrew Donnellan # cxl Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b5e88e4..c0309c5 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -301,6 +301,7 @@ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose, /* Allocate & free a PCI host bridge structure */ extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev); extern void pcibios_free_controller(struct pci_controller *phb); +extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge); #ifdef CONFIG_PCI extern int pcibios_vaddr_is_ioport(void __iomem *address); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 7fdf324..e589080 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -154,6 +154,42 @@ void pcibios_free_controller(struct pci_controller *phb) EXPORT_SYMBOL_GPL(pcibios_free_controller); /* + * This function is used to call pcibios_free_controller() + * in a deferred manner: a callback from the PCI subsystem. + * + * _*DO NOT*_ call pcibios_free_controller() explicitly if + * this is used (or it may access an invalid *phb pointer). + * + * The callback occurs when all references to the root bus + * are dropped (e.g., child buses/devices and their users). + * + * It's called as .release_fn() of 'struct pci_host_bridge' + * which is associated with the 'struct pci_controller.bus' + * (root bus) - it expects .release_data to hold a pointer + * to 'struct pci_controller'. + * + * In order to use it, register .release_fn()/release_data + * like this: + * + * pci_set_host_bridge_release(bridge, + * pcibios_free_controller_deferred + * (void *) phb); + * + * e.g. in the pcibios_root_bridge_prepare() callback from + * pci_create_root_bus(). + */ +void pcibios_free_controller_deferred(struct pci_host_bridge *bridge) +{ + struct pci_controller *phb = (struct pci_controller *) + bridge->release_data; + + pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic); + + pcibios_free_controller(phb); +} +EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred); + +/* * The function is used to return the minimal alignment * for memory or I/O windows of the associated P2P bridge. * By default, 4KiB alignment for I/O windows and 1MiB for diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index fe16a50..09eba5a 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -119,6 +119,10 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) bus = bridge->bus; + /* Rely on the pcibios_free_controller_deferred() callback. */ + pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred, + (void *) pci_bus_to_host(bus)); + dn = pcibios_get_phb_of_node(bus); if (!dn) return 0; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 906dbaa..547fd13 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -106,8 +106,11 @@ int remove_phb_dynamic(struct pci_controller *phb) release_resource(res); } - /* Free pci_controller data structure */ - pcibios_free_controller(phb); + /* + * The pci_controller data structure is freed by + * the pcibios_free_controller_deferred() callback; + * see pseries_root_bridge_prepare(). + */ return 0; } -- cgit v0.10.2 From 6096481649ac27466cf2b453f185b43f680bf541 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 17 Aug 2016 12:03:05 +0200 Subject: powerpc/powernv/pci: fix iterator signedness Unsigned type is always non-negative, so the loop could not end in case condition is never true. The problem has been detected using semantic patch scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci Signed-off-by: Andrzej Hajda Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fd9444f..1321826 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -149,7 +149,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) { - unsigned long pe = phb->ioda.total_pe_num - 1; + long pe; for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) -- cgit v0.10.2 From 8a39b05f086904c3b2e04e4db3d81f30c0eae6ae Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 16 Aug 2016 10:57:34 -0400 Subject: powerpc: migrate exception table users off module.h and onto extable.h These files were only including module.h for exception table related functions. We've now separated that content out into its own file "extable.h" so now move over to that and avoid all the extra header content in module.h that we don't really need to compile these files. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 3ed8ec09..e785cc9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2cb5892..62859eb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -25,7 +25,8 @@ #include #include #include -#include +#include +#include /* print_modules */ #include #include #include diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a4db22f..bb1ffc5 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index dafba10..dfd3100 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 80804f9..f97bab8 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 68e7c0d..3cc7cac 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -23,7 +23,7 @@ */ #include -#include +#include #include #include #include -- cgit v0.10.2 From 19ab58d19ef6291bab07c8e7110f3198c23432c1 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 16 Aug 2016 19:50:12 +0800 Subject: powerpc, hotplug: Avoid to touch non-existent cpumasks. We observed a kernel oops when running a PPC guest with config NR_CPUS=4 and qemu option "-smp cores=1,threads=8": [ 30.634781] Unable to handle kernel paging request for data at address 0xc00000014192eb17 [ 30.636173] Faulting instruction address: 0xc00000000003e5cc [ 30.637069] Oops: Kernel access of bad area, sig: 11 [#1] [ 30.637877] SMP NR_CPUS=4 NUMA pSeries [ 30.638471] Modules linked in: [ 30.638949] CPU: 3 PID: 27 Comm: migration/3 Not tainted 4.7.0-07963-g9714b26 #1 [ 30.640059] task: c00000001e29c600 task.stack: c00000001e2a8000 [ 30.640956] NIP: c00000000003e5cc LR: c00000000003e550 CTR: 0000000000000000 [ 30.642001] REGS: c00000001e2ab8e0 TRAP: 0300 Not tainted (4.7.0-07963-g9714b26) [ 30.643139] MSR: 8000000102803033 CR: 22004084 XER: 00000000 [ 30.644583] CFAR: c000000000009e98 DAR: c00000014192eb17 DSISR: 40000000 SOFTE: 0 GPR00: c00000000140a6b8 c00000001e2abb60 c0000000016dd300 0000000000000003 GPR04: 0000000000000000 0000000000000004 c0000000016e5920 0000000000000008 GPR08: 0000000000000004 c00000014192eb17 0000000000000000 0000000000000020 GPR12: c00000000140a6c0 c00000000ffffc00 c0000000000d3ea8 c00000001e005680 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 c00000001e6b3a00 0000000000000000 0000000000000001 GPR24: c00000001ff85138 c00000001ff85130 000000001eb6f000 0000000000000001 GPR28: 0000000000000000 c0000000017014e0 0000000000000000 0000000000000018 [ 30.653882] NIP [c00000000003e5cc] __cpu_disable+0xcc/0x190 [ 30.654713] LR [c00000000003e550] __cpu_disable+0x50/0x190 [ 30.655528] Call Trace: [ 30.655893] [c00000001e2abb60] [c00000000003e550] __cpu_disable+0x50/0x190 (unreliable) [ 30.657280] [c00000001e2abbb0] [c0000000000aca0c] take_cpu_down+0x5c/0x100 [ 30.658365] [c00000001e2abc10] [c000000000163918] multi_cpu_stop+0x1a8/0x1e0 [ 30.659617] [c00000001e2abc60] [c000000000163cc0] cpu_stopper_thread+0xf0/0x1d0 [ 30.660737] [c00000001e2abd20] [c0000000000d8d70] smpboot_thread_fn+0x290/0x2a0 [ 30.661879] [c00000001e2abd80] [c0000000000d3fa8] kthread+0x108/0x130 [ 30.662876] [c00000001e2abe30] [c000000000009968] ret_from_kernel_thread+0x5c/0x74 [ 30.664017] Instruction dump: [ 30.664477] 7bde1f24 38a00000 787f1f24 3b600001 39890008 7d204b78 7d05e214 7d0b07b4 [ 30.665642] 796b1f24 7d26582a 7d204a14 7d29f214 <7d4048a8> 7d4a3878 7d4049ad 40c2fff4 [ 30.666854] ---[ end trace 32643b7195717741 ]--- The reason of this is that in __cpu_disable(), when we try to set the cpu_sibling_mask or cpu_core_mask of the sibling CPUs of the disabled one, we don't check whether the current configuration employs those sibling CPUs(hw threads). And if a CPU is not employed by a configuration, the percpu structures cpu_{sibling,core}_mask are not allocated, therefore accessing those cpumasks will result in problems as above. This patch fixes this problem by adding an addition check on whether the id is no less than nr_cpu_ids in the sibling CPU iteration code. Signed-off-by: Boqun Feng Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 25a3905..9c6f3fd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -830,7 +830,7 @@ int __cpu_disable(void) /* Update sibling maps */ base = cpu_first_thread_sibling(cpu); - for (i = 0; i < threads_per_core; i++) { + for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) { cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); -- cgit v0.10.2 From 66443efa83dc73775100b7442962ce2cb0d4472e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 12 Aug 2016 21:45:52 +1000 Subject: powerpc/prom: Fix sub-processor option passed to ibm, client-architecture-support When booting from an OpenFirmware which supports it, we use the "ibm,client-architecture-support" firmware call to communicate our capabilities to firmware. The format of the structure we pass to firmware is specified in PAPR (Power Architecture Platform Requirements), or the public version LoPAPR (Linux on Power Architecture Platform Reference). Referring to table 244 in LoPAPR v1.1, option vector 5 contains a 4 byte field at bytes 17-20 for the "Platform Facilities Enable". This is followed by a 1 byte field at byte 21 for "Sub-Processor Represenation Level". Comparing to the code, there we have the Platform Facilities options (OV5_PFO_*) at byte 17, but we fail to pad that field out to its full width of 4 bytes. This means the OV5_SUB_PROCESSORS option is incorrectly placed at byte 18. Fix it by adding zero bytes for bytes 18, 19, 20, and comment the bytes to hopefully make it clearer in future. As far as I'm aware nothing actually consumes this value at this time, so the effect of this bug is nil in practice. It does mean we've been incorrectly setting bit 15 of the "Platform Facilities Enable" option for the past ~3 1/2 years, so we should avoid allocating that bit to anything else in future. Fixes: df77c7992029 ("powerpc/pseries: Update ibm,architecture.vec for PAPR 2.7/POWER8") Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 4e74fc5..d3eff99 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -695,7 +695,7 @@ unsigned char ibm_architecture_vec[] = { OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ /* option vector 5: PAPR/OF options */ - VECTOR_LENGTH(18), /* length */ + VECTOR_LENGTH(21), /* length */ 0, /* don't ignore, don't halt */ OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | @@ -726,8 +726,11 @@ unsigned char ibm_architecture_vec[] = { 0, 0, OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | - OV5_FEAT(OV5_PFO_HW_842), - OV5_FEAT(OV5_SUB_PROCESSORS), + OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */ + 0, /* Byte 18 */ + 0, /* Byte 19 */ + 0, /* Byte 20 */ + OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */ /* option vector 6: IBM PAPR hints */ VECTOR_LENGTH(3), /* length */ -- cgit v0.10.2 From 3f3b5dc14c25254d3fe98115c71b2a1f3ed97798 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Aug 2016 20:48:42 +1000 Subject: powerpc/pseries: PACA save area fix for general exception vs MCE MCE must not use PACA_EXGEN. When a general exception enables MSR_RI, that means SPRN_SRR[01] and SPRN_SPRG are no longer used. However the PACA save area is still in use. Acked-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index df6d45e..034b7ea 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -969,14 +969,14 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) machine_check_common: mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) + std r10,PACA_EXMC+EX_DAR(r13) mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) + stw r10,PACA_EXMC+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) FINISH_NAP RECONCILE_IRQ_STATE(r10, r11) - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) + ld r3,PACA_EXMC+EX_DAR(r13) + lwz r4,PACA_EXMC+EX_DSISR(r13) std r3,_DAR(r1) std r4,_DSISR(r1) bl save_nvgprs -- cgit v0.10.2 From a74599a5041979ae83aec7a6d5c8df9131db2ce6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Aug 2016 20:48:43 +1000 Subject: powerpc/pseries: PACA save area fix for MCE vs MCE MCE must not enable MSR_RI until PACA_EXMC is no longer being used. Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 034b7ea..bffec73 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -485,7 +485,23 @@ machine_check_fwnmi: EXCEPTION_PROLOG_0(PACA_EXMC) machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200) - EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD) + /* + * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the + * difference that MSR_RI is not enabled, because PACA_EXMC is being + * used, so nested machine check corrupts it. machine_check_common + * enables MSR_RI. + */ + ld r12,PACAKBASE(r13) + ld r10,PACAKMSR(r13) + xori r10,r10,MSR_RI + mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r12, machine_check_common) + mtspr SPRN_SRR0,r12 + mfspr r12,SPRN_SRR1 + mtspr SPRN_SRR1,r10 + rfid + b . /* prevent speculative execution */ + KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) @@ -977,6 +993,9 @@ machine_check_common: RECONCILE_IRQ_STATE(r10, r11) ld r3,PACA_EXMC+EX_DAR(r13) lwz r4,PACA_EXMC+EX_DSISR(r13) + /* Enable MSR_RI when finished with PACA_EXMC */ + li r10,MSR_RI + mtmsrd r10,1 std r3,_DAR(r1) std r4,_DSISR(r1) bl save_nvgprs -- cgit v0.10.2 From 41017a7579cf49cb5513e17df1570dc918760079 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 11 Aug 2016 10:50:40 +0200 Subject: powerpc: sysdev: cpm: fix gpio save_regs functions of_mm_gpiochip_add_data() calls mm_gc->save_regs() before setting the data. Therefore ->save_regs() cannot use gpiochip_get_data() [ 0.275940] Unable to handle kernel paging request for data at address 0x00000130 [ 0.283120] Faulting instruction address: 0xc01b44cc [ 0.288175] Oops: Kernel access of bad area, sig: 11 [#1] [ 0.293343] PREEMPT CMPC885 [ 0.296141] CPU: 0 PID: 1 Comm: swapper Not tainted 4.7.0-g65124df-dirty #68 [ 0.304131] task: c6074000 ti: c6080000 task.ti: c6080000 [ 0.309459] NIP: c01b44cc LR: c0011720 CTR: c0011708 [ 0.314372] REGS: c6081d90 TRAP: 0300 Not tainted (4.7.0-g65124df-dirty) [ 0.322267] MSR: 00009032 CR: 24000028 XER: 20000000 [ 0.328813] DAR: 00000130 DSISR: c0000000 GPR00: c01b6d0c c6081e40 c6074000 c6017000 c9028000 c601d028 c6081dd8 00000000 GPR08: c601d028 00000000 ffffffff 00000001 24000044 00000000 c0002790 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 c05643b0 00000083 GPR24: c04a1a6c c0560000 c04a8308 c04c6480 c0012498 c6017000 c7ffcc78 c6017000 [ 0.360806] NIP [c01b44cc] gpiochip_get_data+0x4/0xc [ 0.365684] LR [c0011720] cpm1_gpio16_save_regs+0x18/0x44 [ 0.370972] Call Trace: [ 0.373451] [c6081e50] [c01b6d0c] of_mm_gpiochip_add_data+0x70/0xdc [ 0.379624] [c6081e70] [c00124c0] cpm_init_par_io+0x28/0x118 [ 0.385238] [c6081e80] [c04a8ac0] do_one_initcall+0xb0/0x17c [ 0.390819] [c6081ef0] [c04a8cbc] kernel_init_freeable+0x130/0x1dc [ 0.396924] [c6081f30] [c00027a4] kernel_init+0x14/0x110 [ 0.402177] [c6081f40] [c000b424] ret_from_kernel_thread+0x5c/0x64 [ 0.408233] Instruction dump: [ 0.411168] 4182fafc 3f80c040 48234c6d 3bc0fff0 3b9c5ed0 4bfffaf4 81290020 712a0004 [ 0.418825] 4182fb34 48234c51 4bfffb2c 81230004 <80690130> 4e800020 7c0802a6 9421ffe0 [ 0.426763] ---[ end trace fe4113ee21d72ffa ]--- fixes: e65078f1f3490 ("powerpc: sysdev: cpm1: use gpiochip data pointer") fixes: a14a2d484b386 ("powerpc: cpm_common: use gpiochip data pointer") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Reviewed-by: Linus Walleij Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index 6c11099..81d4947 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -534,7 +534,8 @@ struct cpm1_gpio16_chip { static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc) { - struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm1_gpio16_chip *cpm1_gc = + container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc); struct cpm_ioport16 __iomem *iop = mm_gc->regs; cpm1_gc->cpdata = in_be16(&iop->dat); @@ -649,7 +650,8 @@ struct cpm1_gpio32_chip { static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) { - struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm1_gpio32_chip *cpm1_gc = + container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc); struct cpm_ioport32b __iomem *iop = mm_gc->regs; cpm1_gc->cpdata = in_be32(&iop->dat); diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 911456d..947f420 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -94,7 +94,8 @@ struct cpm2_gpio32_chip { static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) { - struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm2_gpio32_chip *cpm2_gc = + container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc); struct cpm2_ioports __iomem *iop = mm_gc->regs; cpm2_gc->cpdata = in_be32(&iop->dat); -- cgit v0.10.2 From 7c379526d7e71d2041c8ca0bd9af4888916a14b9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Aug 2016 15:07:43 +0200 Subject: powerpc: move hmi.c to arch/powerpc/kvm/ hmi.c functions are unused unless sibling_subcore_state is nonzero, and that in turn happens only if KVM is in use. So move the code to arch/powerpc/kvm/, putting it under CONFIG_KVM_BOOK3S_HV_POSSIBLE rather than CONFIG_PPC_BOOK3S_64. The sibling_subcore_state is also included in struct paca_struct only if KVM is supported by the kernel. Cc: Daniel Axtens Cc: Michael Ellerman Cc: Mahesh Salgaonkar Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Cc: kvm-ppc@vger.kernel.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 88b4901..85b7a1a 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -21,7 +21,7 @@ #ifndef __ASM_PPC64_HMI_H__ #define __ASM_PPC64_HMI_H__ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define CORE_TB_RESYNC_REQ_BIT 63 #define MAX_SUBCORE_PER_CORE 4 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 148303e7..6a6792b 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -183,11 +183,6 @@ struct paca_struct { */ u16 in_mce; u8 hmi_event_available; /* HMI event is available */ - /* - * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for - * more details - */ - struct sibling_subcore_state *sibling_subcore_state; #endif /* Stuff for accurate time accounting */ @@ -202,6 +197,13 @@ struct paca_struct { struct kvmppc_book3s_shadow_vcpu shadow_vcpu; #endif struct kvmppc_host_state kvm_hstate; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for + * more details + */ + struct sibling_subcore_state *sibling_subcore_state; +#endif #endif }; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b2027a5..fe4c075 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o -obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c deleted file mode 100644 index e3f738e..0000000 --- a/arch/powerpc/kernel/hmi.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Hypervisor Maintenance Interrupt (HMI) handling. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. - * - * Copyright 2015 IBM Corporation - * Author: Mahesh Salgaonkar - */ - -#undef DEBUG - -#include -#include -#include -#include - -void wait_for_subcore_guest_exit(void) -{ - int i; - - /* - * NULL bitmap pointer indicates that KVM module hasn't - * been loaded yet and hence no guests are running. - * If no KVM is in use, no need to co-ordinate among threads - * as all of them will always be in host and no one is going - * to modify TB other than the opal hmi handler. - * Hence, just return from here. - */ - if (!local_paca->sibling_subcore_state) - return; - - for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) - while (local_paca->sibling_subcore_state->in_guest[i]) - cpu_relax(); -} - -void wait_for_tb_resync(void) -{ - if (!local_paca->sibling_subcore_state) - return; - - while (test_bit(CORE_TB_RESYNC_REQ_BIT, - &local_paca->sibling_subcore_state->flags)) - cpu_relax(); -} diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1f9e552..855d4b9 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -78,6 +78,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + book3s_hv_hmi.o \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c new file mode 100644 index 0000000..e3f738e --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -0,0 +1,56 @@ +/* + * Hypervisor Maintenance Interrupt (HMI) handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + * + * Copyright 2015 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG + +#include +#include +#include +#include + +void wait_for_subcore_guest_exit(void) +{ + int i; + + /* + * NULL bitmap pointer indicates that KVM module hasn't + * been loaded yet and hence no guests are running. + * If no KVM is in use, no need to co-ordinate among threads + * as all of them will always be in host and no one is going + * to modify TB other than the opal hmi handler. + * Hence, just return from here. + */ + if (!local_paca->sibling_subcore_state) + return; + + for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) + while (local_paca->sibling_subcore_state->in_guest[i]) + cpu_relax(); +} + +void wait_for_tb_resync(void) +{ + if (!local_paca->sibling_subcore_state) + return; + + while (test_bit(CORE_TB_RESYNC_REQ_BIT, + &local_paca->sibling_subcore_state->flags)) + cpu_relax(); +} -- cgit v0.10.2 From 6f00975c619064a18c23fd3aced325ae165a73b9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sat, 20 Aug 2016 12:22:11 +0200 Subject: drm: Reject page_flip for !DRIVER_MODESET Somehow this one slipped through, which means drivers without modeset support can be oopsed (since those also don't call drm_mode_config_init, which means the crtc lookup will chase an uninitalized idr). Reported-by: Alexander Potapenko Cc: Alexander Potapenko Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index b1dbb60..ddebe54 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -5404,6 +5404,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, struct drm_pending_vblank_event *e = NULL; int ret = -EINVAL; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS || page_flip->reserved != 0) return -EINVAL; -- cgit v0.10.2 From 93e11eb1b745ee8dbd4f98f24b542d496c8f1e03 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Aug 2016 13:44:56 +0000 Subject: dmaengine: fsl_raid: add missing of_node_put() in fsl_re_probe() When terminating for_each_compatible_node() iteration with break or return, of_node_put() should be used to prevent stale device node references from being left behind. Found by Coccinelle. Signed-off-by: Wei Yongjun Signed-off-by: Vinod Koul diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c index aad167e..de2a2a2 100644 --- a/drivers/dma/fsl_raid.c +++ b/drivers/dma/fsl_raid.c @@ -836,6 +836,7 @@ static int fsl_re_probe(struct platform_device *ofdev) rc = of_property_read_u32(np, "reg", &off); if (rc) { dev_err(dev, "Reg property not found in JQ node\n"); + of_node_put(np); return -ENODEV; } /* Find out the Job Rings present under each JQ */ -- cgit v0.10.2 From 6a8b0c6b18f62a277ffb2139d0c0253fe35d7feb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Aug 2016 03:17:09 +0000 Subject: dmaengine: at_xdmac: fix to pass correct device identity to free_irq() free_irq() expects the same device identity that was passed to corresponding request_irq(), otherwise the IRQ is not freed. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Wei Yongjun Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index e434ffe..832cbd6 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2067,7 +2067,7 @@ err_dma_unregister: err_clk_disable: clk_disable_unprepare(atxdmac->clk); err_free_irq: - free_irq(atxdmac->irq, atxdmac->dma.dev); + free_irq(atxdmac->irq, atxdmac); return ret; } @@ -2081,7 +2081,7 @@ static int at_xdmac_remove(struct platform_device *pdev) dma_async_device_unregister(&atxdmac->dma); clk_disable_unprepare(atxdmac->clk); - free_irq(atxdmac->irq, atxdmac->dma.dev); + free_irq(atxdmac->irq, atxdmac); for (i = 0; i < atxdmac->dma.chancnt; i++) { struct at_xdmac_chan *atchan = &atxdmac->chan[i]; -- cgit v0.10.2 From 32e80820de5d7eb778632af8f235727a32d3aeb2 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Tue, 16 Aug 2016 10:44:18 +0200 Subject: dmaengine: img-mdc: fix a possible NULL dereference of_match_device could return NULL, and so cause a NULL pointer dereference later at line 850: mdma->soc = match->data; For fixing this problem, we use of_device_get_match_data(), this will simplify the code a little by using a standard function for getting the match data. This was reported by coverity (CID 1324134) Signed-off-by: LABBE Corentin Signed-off-by: Vinod Koul diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c index a4c53be..624f1e1 100644 --- a/drivers/dma/img-mdc-dma.c +++ b/drivers/dma/img-mdc-dma.c @@ -861,7 +861,6 @@ static int mdc_dma_probe(struct platform_device *pdev) { struct mdc_dma *mdma; struct resource *res; - const struct of_device_id *match; unsigned int i; u32 val; int ret; @@ -871,8 +870,7 @@ static int mdc_dma_probe(struct platform_device *pdev) return -ENOMEM; platform_set_drvdata(pdev, mdma); - match = of_match_device(mdc_dma_of_match, &pdev->dev); - mdma->soc = match->data; + mdma->soc = of_device_get_match_data(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); mdma->regs = devm_ioremap_resource(&pdev->dev, res); -- cgit v0.10.2 From f6c274e11e3b31a5f95c23962b1ba593bd6a4759 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 28 Jul 2016 10:16:12 +0300 Subject: usb: dwc3: pci: runtime_resume child device During runtime_resume of dwc3-pci.c, we need to runtime suspend our child device (which is dwc3 proper) otherwise nothing will happen. Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 2eb84d6..0a32430 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -243,6 +243,13 @@ static int dwc3_pci_runtime_suspend(struct device *dev) return -EBUSY; } +static int dwc3_pci_runtime_resume(struct device *dev) +{ + struct platform_device *dwc3 = dev_get_drvdata(dev); + + return pm_runtime_get(&dwc3->dev); +} + static int dwc3_pci_pm_dummy(struct device *dev) { /* @@ -259,7 +266,7 @@ static int dwc3_pci_pm_dummy(struct device *dev) static struct dev_pm_ops dwc3_pci_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dwc3_pci_pm_dummy, dwc3_pci_pm_dummy) - SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_pm_dummy, + SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_runtime_resume, NULL) }; -- cgit v0.10.2 From b74c2d875baaa07186f617c5617f4c5e3a8a41ad Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 28 Jul 2016 13:07:07 +0300 Subject: usb: dwc3: core: allow device to runtime_suspend several times After going through runtime_suspend/runtime_resume cycle once we would be left with an unbalanced pm_runtime_get() call. Fix that by making sure that we try to suspend right after resuming so things are balanced and device can runtime_suspend again. Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 9466431..35d0924 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1192,6 +1192,7 @@ static int dwc3_runtime_resume(struct device *dev) } pm_runtime_mark_last_busy(dev); + pm_runtime_put(dev); return 0; } -- cgit v0.10.2 From 83f8da562f8b5275fa1095b45762996971f7c607 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 10 Aug 2016 08:53:34 -0500 Subject: usb: dwc2: Add reset control to dwc2 Allow for platforms that have a reset controller driver in place to bring the USB IP out of reset. Signed-off-by: Dinh Nguyen Acked-by: John Youn Tested-by: Stefan Wahren Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 9fae029..d645512 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -868,6 +868,7 @@ struct dwc2_hsotg { void *priv; int irq; struct clk *clk; + struct reset_control *reset; unsigned int queuing_high_bandwidth:1; unsigned int srp_success:1; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index fc6f525..530959a 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -337,6 +338,24 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg) { int i, ret; + hsotg->reset = devm_reset_control_get_optional(hsotg->dev, "dwc2"); + if (IS_ERR(hsotg->reset)) { + ret = PTR_ERR(hsotg->reset); + switch (ret) { + case -ENOENT: + case -ENOTSUPP: + hsotg->reset = NULL; + break; + default: + dev_err(hsotg->dev, "error getting reset control %d\n", + ret); + return ret; + } + } + + if (hsotg->reset) + reset_control_deassert(hsotg->reset); + /* Set default UTMI width */ hsotg->phyif = GUSBCFG_PHYIF16; @@ -434,6 +453,9 @@ static int dwc2_driver_remove(struct platform_device *dev) if (hsotg->ll_hw_enabled) dwc2_lowlevel_hw_disable(hsotg); + if (hsotg->reset) + reset_control_assert(hsotg->reset); + return 0; } -- cgit v0.10.2 From 3295235fd70ed6d594aadee8c892a14f6a4b2d2e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 13 Aug 2016 01:28:24 +0000 Subject: usb: renesas_usbhs: gadget: fix return value check in usbhs_mod_gadget_probe() In case of error, the function usb_get_phy() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: b5a2875605ca ("usb: renesas_usbhs: Allow an OTG PHY driver to provide VBUS") Cc: # v4.3+ Acked-by: Yoshihiro Shimoda Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 92bc83b..c4c6474 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -1076,7 +1076,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv) gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED); dev_info(dev, "%stransceiver found\n", - gpriv->transceiver ? "" : "no "); + !IS_ERR(gpriv->transceiver) ? "" : "no "); /* * CAUTION -- cgit v0.10.2 From 70237dc8efd092b93b40dc2eba812d66a5d65cb1 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 11 Aug 2016 15:51:45 +0800 Subject: usb: gadget: function: f_eem: socket buffer may be NULL In eth_start_xmit, the socket buffer may be NULL. So, add NULL pointer check at .wrap API. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c index d58bfc3..8741fd7 100644 --- a/drivers/usb/gadget/function/f_eem.c +++ b/drivers/usb/gadget/function/f_eem.c @@ -341,11 +341,15 @@ static struct sk_buff *eem_wrap(struct gether *port, struct sk_buff *skb) { struct sk_buff *skb2 = NULL; struct usb_ep *in = port->in_ep; - int padlen = 0; + int headroom, tailroom, padlen = 0; u16 len = skb->len; - int headroom = skb_headroom(skb); - int tailroom = skb_tailroom(skb); + if (!skb) + return NULL; + + len = skb->len; + headroom = skb_headroom(skb); + tailroom = skb_tailroom(skb); /* When (len + EEM_HLEN + ETH_FCS_LEN) % in->maxpacket) is 0, * stick two bytes of zero-length EEM packet on the end. -- cgit v0.10.2 From 80d1642d7640ef00d8823f17a529785331aceb96 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 11 Aug 2016 15:51:46 +0800 Subject: usb: gadget: function: f_rndis: socket buffer may be NULL In eth_start_xmit, the socket buffer may be NULL. So, add NULL pointer check at .wrap API. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index c800582..16562e4 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -374,6 +374,9 @@ static struct sk_buff *rndis_add_header(struct gether *port, { struct sk_buff *skb2; + if (!skb) + return NULL; + skb2 = skb_realloc_headroom(skb, sizeof(struct rndis_packet_msg_type)); rndis_add_hdr(skb2); -- cgit v0.10.2 From f4693b08cc901912a87369c46537b94ed4084ea0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Jul 2016 14:15:47 +0300 Subject: usb: gadget: fsl_qe_udc: signedness bug in qe_get_frame() We can't assign -EINVAL to a u16. Fixes: 3948f0e0c999 ('usb: add Freescale QE/CPM USB peripheral controller driver') Acked-by: Peter Chen Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c index cf8819a..8bb011e 100644 --- a/drivers/usb/gadget/udc/fsl_qe_udc.c +++ b/drivers/usb/gadget/udc/fsl_qe_udc.c @@ -1878,11 +1878,8 @@ static int qe_get_frame(struct usb_gadget *gadget) tmp = in_be16(&udc->usb_param->frame_n); if (tmp & 0x8000) - tmp = tmp & 0x07ff; - else - tmp = -EINVAL; - - return (int)tmp; + return tmp & 0x07ff; + return -EINVAL; } static int fsl_qe_start(struct usb_gadget *gadget, -- cgit v0.10.2 From d6011f6fc21b4d4ab1586f01c4f62becaa0a28d7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 16 Aug 2016 10:22:38 +0100 Subject: usb: dwc3: gadget: don't rely on jiffies while holding spinlock __dwc3_gadget_wakeup() is called while holding a spinlock, then depends on jiffies in order to timeout while polling the USB core for a link state update. In the case the wakeup failed, the timeout will never happen and will also cause the cpu to stall until rcu_preempt kicks in. This switches to a "decrement variable and wait" timeout scheme. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1f5597e..122e64d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1433,7 +1433,7 @@ static int dwc3_gadget_get_frame(struct usb_gadget *g) static int __dwc3_gadget_wakeup(struct dwc3 *dwc) { - unsigned long timeout; + int retries; int ret; u32 reg; @@ -1484,9 +1484,9 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc) } /* poll until Link State changes to ON */ - timeout = jiffies + msecs_to_jiffies(100); + retries = 20000; - while (!time_after(jiffies, timeout)) { + while (retries--) { reg = dwc3_readl(dwc->regs, DWC3_DSTS); /* in HS, means ON */ -- cgit v0.10.2 From 511a36d2f357724312bb3776d2f6eed3890928b2 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 30 Jun 2016 17:10:23 +0800 Subject: usb: gadget: Add the gserial port checking in gs_start_tx() When usb gadget is set gadget serial function, it will be crash in below situation. It will clean the 'port->port_usb' pointer in gserial_disconnect() function when usb link is inactive, but it will release lock for disabling the endpoints in this function. Druing the lock release period, it maybe complete one request to issue gs_write_complete()--->gs_start_tx() function, but the 'port->port_usb' pointer had been set NULL, thus it will be crash in gs_start_tx() function. This patch adds the 'port->port_usb' pointer checking in gs_start_tx() function to avoid this situation. Signed-off-by: Baolin Wang Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 6ded634..e0cd1e4 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -375,10 +375,15 @@ __acquires(&port->port_lock) */ { struct list_head *pool = &port->write_pool; - struct usb_ep *in = port->port_usb->in; + struct usb_ep *in; int status = 0; bool do_tty_wake = false; + if (!port->port_usb) + return status; + + in = port->port_usb->in; + while (!port->write_busy && !list_empty(pool)) { struct usb_request *req; int len; -- cgit v0.10.2 From 4396f46c8c628329bd35ee4b84140b8b001a11eb Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Mon, 22 Aug 2016 16:21:52 +0800 Subject: genirq: Fix potential memleak when failing to get irq pm Obviously we should free action here if irq_chip_pm_get failed. Fixes: be45beb2df69: "genirq: Add runtime power management support for IRQ chips" Signed-off-by: Shawn Lin Cc: Jon Hunter Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1471854112-13006-1-git-send-email-shawn.lin@rock-chips.com Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 73a2b78..9530fcd 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1681,8 +1681,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, action->dev_id = dev_id; retval = irq_chip_pm_get(&desc->irq_data); - if (retval < 0) + if (retval < 0) { + kfree(action); return retval; + } chip_bus_lock(desc); retval = __setup_irq(irq, desc, action); @@ -1985,8 +1987,10 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler, action->percpu_dev_id = dev_id; retval = irq_chip_pm_get(&desc->irq_data); - if (retval < 0) + if (retval < 0) { + kfree(action); return retval; + } chip_bus_lock(desc); retval = __setup_irq(irq, desc, action); -- cgit v0.10.2 From 3ee0ce2a54dff07d09440723594df89bc1a12e79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Aug 2016 07:06:45 -0700 Subject: genirq/affinity: Use get/put_online_cpus around cpumask operations Without locking out CPU mask operations we might end up with an inconsistent view of the cpumask in the function. Fixes: 5e385a6ef31f: "genirq: Add a helper to spread an affinity mask for MSI/MSI-X vectors" Signed-off-by: Christoph Hellwig Link: http://lkml.kernel.org/r/1470924405-25728-1-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f689593..32f6cfc 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -39,6 +39,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) return NULL; } + get_online_cpus(); if (max_vecs >= num_online_cpus()) { cpumask_copy(affinity_mask, cpu_online_mask); *nr_vecs = num_online_cpus(); @@ -56,6 +57,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) } *nr_vecs = vecs; } + put_online_cpus(); return affinity_mask; } -- cgit v0.10.2 From 7e4379eae0e31994ea645db1d13006ea8e5ce539 Mon Sep 17 00:00:00 2001 From: Andrej Krutak Date: Thu, 18 Aug 2016 23:52:10 +0200 Subject: ALSA: line6: Remove double line6_pcm_release() after failed acquire. If there's an error, pcm is released in line6_pcm_acquire already. Fixes: 247d95ee6dd2 ('ALSA: line6: Handle error from line6_pcm_acquire()') Cc: # v4.0+ Reviewed-by: Stefan Hajnoczi Signed-off-by: Andrej Krutak Signed-off-by: Takashi Iwai diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 204cc07..2bc8879 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -55,7 +55,6 @@ static int snd_line6_impulse_volume_put(struct snd_kcontrol *kcontrol, err = line6_pcm_acquire(line6pcm, LINE6_STREAM_IMPULSE); if (err < 0) { line6pcm->impulse_volume = 0; - line6_pcm_release(line6pcm, LINE6_STREAM_IMPULSE); return err; } } else { -- cgit v0.10.2 From adc8a43a6d6688272ebffa81789fa857e603dec6 Mon Sep 17 00:00:00 2001 From: Andrej Krutak Date: Thu, 18 Aug 2016 23:52:11 +0200 Subject: ALSA: line6: Give up on the lock while URBs are released. Done, because line6_stream_stop() locks and calls line6_unlink_audio_urbs(), which in turn invokes audio_out_callback(), which tries to lock 2nd time. Fixes: ============================================= [ INFO: possible recursive locking detected ] 4.4.15+ #15 Not tainted --------------------------------------------- mplayer/3591 is trying to acquire lock: (&(&line6pcm->out.lock)->rlock){-.-...}, at: [] audio_out_callback+0x70/0x110 [snd_usb_line6] but task is already holding lock: (&(&line6pcm->out.lock)->rlock){-.-...}, at: [] line6_stream_stop+0x24/0x5c [snd_usb_line6] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&line6pcm->out.lock)->rlock); lock(&(&line6pcm->out.lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by mplayer/3591: #0: (snd_pcm_link_rwlock){.-.-..}, at: [] snd_pcm_stream_lock+0x1e/0x40 [snd_pcm] #1: (&(&substream->self_group.lock)->rlock){-.-...}, at: [] snd_pcm_stream_lock+0x26/0x40 [snd_pcm] #2: (&(&line6pcm->out.lock)->rlock){-.-...}, at: [] line6_stream_stop+0x24/0x5c [snd_usb_line6] stack backtrace: CPU: 0 PID: 3591 Comm: mplayer Not tainted 4.4.15+ #15 Hardware name: Generic AM33XX (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x11/0x14) [] (show_stack) from [] (dump_stack+0x8b/0xac) [] (dump_stack) from [] (__lock_acquire+0xc8b/0x1780) [] (__lock_acquire) from [] (lock_acquire+0x99/0x1c0) [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x3f/0x4c) [] (_raw_spin_lock_irqsave) from [] (audio_out_callback+0x70/0x110 [snd_usb_line6]) [] (audio_out_callback [snd_usb_line6]) from [] (__usb_hcd_giveback_urb+0x53/0xd0) [] (__usb_hcd_giveback_urb) from [] (musb_giveback+0x3d/0x98) [] (musb_giveback) from [] (musb_urb_dequeue+0x6d/0x114) [] (musb_urb_dequeue) from [] (usb_hcd_unlink_urb+0x39/0x98) [] (usb_hcd_unlink_urb) from [] (line6_unlink_audio_urbs+0x6a/0x6c [snd_usb_line6]) [] (line6_unlink_audio_urbs [snd_usb_line6]) from [] (line6_stream_stop+0x42/0x5c [snd_usb_line6]) [] (line6_stream_stop [snd_usb_line6]) from [] (snd_line6_trigger+0xb6/0xf4 [snd_usb_line6]) [] (snd_line6_trigger [snd_usb_line6]) from [] (snd_pcm_do_stop+0x36/0x38 [snd_pcm]) [] (snd_pcm_do_stop [snd_pcm]) from [] (snd_pcm_action_single+0x22/0x40 [snd_pcm]) [] (snd_pcm_action_single [snd_pcm]) from [] (snd_pcm_action+0xac/0xb0 [snd_pcm]) [] (snd_pcm_action [snd_pcm]) from [] (snd_pcm_drop+0x38/0x64 [snd_pcm]) [] (snd_pcm_drop [snd_pcm]) from [] (snd_pcm_common_ioctl1+0x7fe/0xbe8 [snd_pcm]) [] (snd_pcm_common_ioctl1 [snd_pcm]) from [] (snd_pcm_playback_ioctl1+0x15c/0x51c [snd_pcm]) [] (snd_pcm_playback_ioctl1 [snd_pcm]) from [] (snd_pcm_playback_ioctl+0x20/0x28 [snd_pcm]) [] (snd_pcm_playback_ioctl [snd_pcm]) from [] (do_vfs_ioctl+0x3af/0x5c8) Fixes: 63e20df1e5b2 ('ALSA: line6: Reorganize PCM stream handling') Cc: # v4.0+ Reviewed-by: Stefan Hajnoczi Signed-off-by: Andrej Krutak Signed-off-by: Takashi Iwai diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 2bc8879..41aa335 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -210,7 +210,9 @@ static void line6_stream_stop(struct snd_line6_pcm *line6pcm, int direction, spin_lock_irqsave(&pstr->lock, flags); clear_bit(type, &pstr->running); if (!pstr->running) { + spin_unlock_irqrestore(&pstr->lock, flags); line6_unlink_audio_urbs(line6pcm, pstr); + spin_lock_irqsave(&pstr->lock, flags); if (direction == SNDRV_PCM_STREAM_CAPTURE) { line6pcm->prev_fbuf = NULL; line6pcm->prev_fsize = 0; -- cgit v0.10.2 From b027d11263836a0cd335520175257dcb99b43757 Mon Sep 17 00:00:00 2001 From: Andrej Krutak Date: Thu, 18 Aug 2016 23:52:12 +0200 Subject: ALSA: line6: Fix POD sysfs attributes segfault The commit 02fc76f6a changed base of the sysfs attributes from device to card. The "show" callbacks dereferenced wrong objects because of this. Fixes: 02fc76f6a7db ('ALSA: line6: Create sysfs via snd_card_add_dev_attr()') Cc: # v4.0+ Reviewed-by: Stefan Hajnoczi Signed-off-by: Andrej Krutak Signed-off-by: Takashi Iwai diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c index daf81d1..45dd348 100644 --- a/sound/usb/line6/pod.c +++ b/sound/usb/line6/pod.c @@ -244,8 +244,8 @@ static int pod_set_system_param_int(struct usb_line6_pod *pod, int value, static ssize_t serial_number_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_interface *interface = to_usb_interface(dev); - struct usb_line6_pod *pod = usb_get_intfdata(interface); + struct snd_card *card = dev_to_snd_card(dev); + struct usb_line6_pod *pod = card->private_data; return sprintf(buf, "%u\n", pod->serial_number); } @@ -256,8 +256,8 @@ static ssize_t serial_number_show(struct device *dev, static ssize_t firmware_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_interface *interface = to_usb_interface(dev); - struct usb_line6_pod *pod = usb_get_intfdata(interface); + struct snd_card *card = dev_to_snd_card(dev); + struct usb_line6_pod *pod = card->private_data; return sprintf(buf, "%d.%02d\n", pod->firmware_version / 100, pod->firmware_version % 100); @@ -269,8 +269,8 @@ static ssize_t firmware_version_show(struct device *dev, static ssize_t device_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_interface *interface = to_usb_interface(dev); - struct usb_line6_pod *pod = usb_get_intfdata(interface); + struct snd_card *card = dev_to_snd_card(dev); + struct usb_line6_pod *pod = card->private_data; return sprintf(buf, "%d\n", pod->device_id); } -- cgit v0.10.2 From f4750a46a0dee58f7a65b438b28a092669b609aa Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 17 Jun 2016 13:42:18 -0700 Subject: drm/i915/gen9: Initialize intel_state->active_crtcs during WM sanitization (v2) intel_state->active_crtcs is usually only initialized when doing a modeset. During our first atomic commit after boot, we're effectively faking a modeset to sanitize the DDB/wm setup, so ensure that this field gets initialized before use. v2: - Don't clobber active_crtcs if our first commit really is a modeset (Maarten) - Grab connection_mutex when faking a modeset during sanitization (Maarten) Reported-by: Tvrtko Ursulin Cc: Tvrtko Ursulin Cc: Maarten Lankhorst Signed-off-by: Matt Roper Tested-by: Tvrtko Ursulin Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1466196140-16336-2-git-send-email-matthew.d.roper@intel.com Cc: stable@vger.kernel.org #v4.7+ (cherry picked from commit 1b54a880b250acc226b13cea221b90aa1b3e37dd) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d5deb58..e9763a8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3912,9 +3912,24 @@ skl_compute_ddb(struct drm_atomic_state *state) * pretend that all pipes switched active status so that we'll * ensure a full DDB recompute. */ - if (dev_priv->wm.distrust_bios_wm) + if (dev_priv->wm.distrust_bios_wm) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; + intel_state->active_pipe_changes = ~0; + /* + * We usually only initialize intel_state->active_crtcs if we + * we're doing a modeset; make sure this field is always + * initialized during the sanitization process that happens + * on the first commit too. + */ + if (!intel_state->modeset) + intel_state->active_crtcs = dev_priv->active_crtcs; + } + /* * If the modeset changes which CRTC's are active, we need to * recompute the DDB allocation for *all* active pipes, even -- cgit v0.10.2 From c7aca235aa60d1432c95b752812d359d0dbece4f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 17 Jun 2016 13:42:20 -0700 Subject: drm/i915/gen9: Drop invalid WARN() during data rate calculation It's possible to have a non-zero plane mask and still wind up with a total data rate of zero. There are two cases where this can happen: * planes are active (from the KMS point of view), but are all fully clipped (positioned offscreen) * the only active plane on a CRTC is the cursor (which is handled independently and not counted into the general data rate computations These are both valid display setups (although unusual), so we need to drop the WARN(). Signed-off-by: Matt Roper Reviewed-by: Maarten Lankhorst Testcase: kms_universal_planes.cursor-only-pipe-* Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1466196140-16336-4-git-send-email-matthew.d.roper@intel.com Cc: stable@vger.kernel.org #v4.7+ (cherry picked from commit 43aa7e87507f519b0b2497b6fac1e894554eaef2) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e9763a8..421c5b7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3107,8 +3107,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; } - WARN_ON(cstate->plane_mask && total_data_rate == 0); - return total_data_rate; } -- cgit v0.10.2 From dcd79934b0dd803fdb29216fbd6f4a899a66f466 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 Aug 2016 17:16:40 +0100 Subject: drm/i915: Unconditionally flush any chipset buffers before execbuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If userspace is asynchronously streaming into the batch or other execobjects, we may not flush those writes along with a change in cache domain (as there is no change). Therefore those writes may end up in internal chipset buffers and not visible to the GPU upon execution. We must issue a flush command or otherwise we encounter incoherency in the batchbuffers and the GPU executing invalid commands (i.e. hanging) quite regularly. v2: Throw a paranoid wmb() into the general flush so that we remain consistent with before. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90841 Fixes: 1816f9236303 ("drm/i915: Support creation of unbound wc user...") Signed-off-by: Chris Wilson Cc: Akash Goel Cc: Daniel Vetter Cc: Tvrtko Ursulin Tested-by: Matti Hämäläinen Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-1-chris@chris-wilson.co.uk (cherry picked from commit 600f436801deae65e48404847b61c89b4944e355) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 20fe9d5..3ca674e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3591,6 +3591,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { + wmb(); if (INTEL_GEN(dev_priv) < 6) intel_gtt_chipset_flush(); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633..b35e5b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -943,8 +943,6 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, { const unsigned other_rings = ~intel_engine_flag(req->engine); struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; int ret; list_for_each_entry(vma, vmas, exec_list) { @@ -957,16 +955,11 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, } if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); - - flush_domains |= obj->base.write_domain; + i915_gem_clflush_object(obj, false); } - if (flush_chipset) - i915_gem_chipset_flush(req->engine->i915); - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); + /* Unconditionally flush any chipset caches (for streaming writes). */ + i915_gem_chipset_flush(req->engine->i915); /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. -- cgit v0.10.2 From 0184c2fff11705d0041c61356c57eac2e033c686 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Fri, 19 Aug 2016 15:23:42 +0100 Subject: drm/i915: Reattach comment, complete type specification In the recent patch bc3d674 drm/i915: Allow userspace to request no-error-capture upon ... the final version moved the flags and the associated #defines around so they were adjacent; unfortunately, they ended up between a comment and the thing (hw_id) to which the comment applies :( So this patch reshuffles the comment and subject back together. Also, as we're touching 'hw_id', let's change it from just 'unsigned' to a fully-specified 'unsigned int', because some code checking tools (including checkpatch) object to plain 'unsigned'. Fixes: bc3d674462e5 ("drm/i915: Allow userspace to request no-error-capture...") Signed-off-by: Dave Gordon Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1471616622-6919-1-git-send-email-david.s.gordon@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit 0be81156b3fb4d4e8e2c94177e5222dc21c3ff10) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3ca674e..46e372e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -882,11 +882,12 @@ struct i915_gem_context { struct i915_ctx_hang_stats hang_stats; - /* Unique identifier for this context, used by the hw for tracking */ unsigned long flags; #define CONTEXT_NO_ZEROMAP BIT(0) #define CONTEXT_NO_ERROR_CAPTURE BIT(1) - unsigned hw_id; + + /* Unique identifier for this context, used by the hw for tracking */ + unsigned int hw_id; u32 user_handle; u32 ggtt_alignment; -- cgit v0.10.2 From 5bc6abe7674d9cf41dbcdaaf98a19184da181439 Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 17 Aug 2016 15:55:53 -0400 Subject: drm/i915/gen6+: Interpret mailbox error flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to add proper support for the SAGV, we need to be able to know what the cause of a failure to change the SAGV through the pcode mailbox was. The reasoning for this is that some very early pre-release Skylake machines don't actually allow you to control the SAGV on them, and indicate an invalid mailbox command was sent. This also might come in handy in the future for debugging. Changes since v1: - Add functions for interpreting gen6 mailbox error codes along with gen7+ error codes, and actually interpret those codes properly - Renamed patch to reflect new behavior Signed-off-by: Lyude Cc: Matt Roper Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-2-git-send-email-cpaul@redhat.com [mlankhorst: -ENOSYS -> -ENXIO for checkpatch] (cherry picked from commit 87660502f1a4d51fb043e89a45d30c9917787c22) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5c06413..2b9508d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7145,6 +7145,15 @@ enum { #define GEN6_PCODE_MAILBOX _MMIO(0x138124) #define GEN6_PCODE_READY (1<<31) +#define GEN6_PCODE_ERROR_MASK 0xFF +#define GEN6_PCODE_SUCCESS 0x0 +#define GEN6_PCODE_ILLEGAL_CMD 0x1 +#define GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x2 +#define GEN6_PCODE_TIMEOUT 0x3 +#define GEN6_PCODE_UNIMPLEMENTED_CMD 0xFF +#define GEN7_PCODE_TIMEOUT 0x2 +#define GEN7_PCODE_ILLEGAL_DATA 0x3 +#define GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10 #define GEN6_PCODE_WRITE_RC6VIDS 0x4 #define GEN6_PCODE_READ_RC6VIDS 0x5 #define GEN6_ENCODE_RC6_VID(mv) (((mv) - 245) / 5) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 421c5b7..2c88c2f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7671,8 +7671,53 @@ void intel_init_pm(struct drm_device *dev) } } +static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_UNIMPLEMENTED_CMD: + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + case GEN6_PCODE_TIMEOUT: + return -ETIMEDOUT; + default: + MISSING_CASE(flags) + return 0; + } +} + +static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN7_PCODE_TIMEOUT: + return -ETIMEDOUT; + case GEN7_PCODE_ILLEGAL_DATA: + return -EINVAL; + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + default: + MISSING_CASE(flags); + return 0; + } +} + int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can @@ -7699,12 +7744,25 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val *val = I915_READ_FW(GEN6_PCODE_DATA); I915_WRITE_FW(GEN6_PCODE_DATA, 0); + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", + status); + return status; + } + return 0; } int sandybridge_pcode_write(struct drm_i915_private *dev_priv, - u32 mbox, u32 val) + u32 mbox, u32 val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can @@ -7729,6 +7787,17 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN6_PCODE_DATA, 0); + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", + status); + return status; + } + return 0; } -- cgit v0.10.2 From f403372658fc7b652a77885a4141e58e57d9c75a Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 17 Aug 2016 15:55:54 -0400 Subject: drm/i915/skl: Add support for the SAGV, fix underrun hangs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the watermark calculations for Skylake are still broken, we're apt to hitting underruns very easily under multi-monitor configurations. While it would be lovely if this was fixed, it's not. Another problem that's been coming from this however, is the mysterious issue of underruns causing full system hangs. An easy way to reproduce this with a skylake system: - Get a laptop with a skylake GPU, and hook up two external monitors to it - Move the cursor from the built-in LCD to one of the external displays as quickly as you can - You'll get a few pipe underruns, and eventually the entire system will just freeze. After doing a lot of investigation and reading through the bspec, I found the existence of the SAGV, which is responsible for adjusting the system agent voltage and clock frequencies depending on how much power we need. According to the bspec: "The display engine access to system memory is blocked during the adjustment time. SAGV defaults to enabled. Software must use the GT-driver pcode mailbox to disable SAGV when the display engine is not able to tolerate the blocking time." The rest of the bspec goes on to explain that software can simply leave the SAGV enabled, and disable it when we use interlaced pipes/have more then one pipe active. Sure enough, with this patchset the system hangs resulting from pipe underruns on Skylake have completely vanished on my T460s. Additionally, the bspec mentions turning off the SAGV with more then one pipe enabled as a workaround for display underruns. While this patch doesn't entirely fix that, it looks like it does improve the situation a little bit so it's likely this is going to be required to make watermarks on Skylake fully functional. This will still need additional work in the future: we shouldn't be enabling the SAGV if any of the currently enabled planes can't enable WM levels that introduce latencies >= 30 µs. Changes since v11: - Add skl_can_enable_sagv() - Make sure we don't enable SAGV when not all planes can enable watermarks >= the SAGV engine block time. I was originally going to save this for later, but I recently managed to run into a machine that was having problems with a single pipe configuration + SAGV. - Make comparisons to I915_SKL_SAGV_NOT_CONTROLLED explicit - Change I915_SAGV_DYNAMIC_FREQ to I915_SAGV_ENABLE - Move printks outside of mutexes - Don't print error messages twice Changes since v10: - Apparently sandybridge_pcode_read actually writes values and reads them back, despite it's misleading function name. This means we've been doing this mostly wrong and have been writing garbage to the SAGV control. Because of this, we no longer attempt to read the SAGV status during initialization (since there are no helpers for this). - mlankhorst noticed that this patch was breaking on some very early pre-release Skylake machines, which apparently don't allow you to disable the SAGV. To prevent machines from failing tests due to SAGV errors, if the first time we try to control the SAGV results in the mailbox indicating an invalid command, we just disable future attempts to control the SAGV state by setting dev_priv->skl_sagv_status to I915_SKL_SAGV_NOT_CONTROLLED and make a note of it in dmesg. - Move mutex_unlock() a little higher in skl_enable_sagv(). This doesn't actually fix anything, but lets us release the lock a little sooner since we're finished with it. Changes since v9: - Only enable/disable sagv on Skylake Changes since v8: - Add intel_state->modeset guard to the conditional for skl_enable_sagv() Changes since v7: - Remove GEN9_SAGV_LOW_FREQ, replace with GEN9_SAGV_IS_ENABLED (that's all we use it for anyway) - Use GEN9_SAGV_IS_ENABLED instead of 0x1 for clarification - Fix a styling error that snuck past me Changes since v6: - Protect skl_enable_sagv() with intel_state->modeset conditional in intel_atomic_commit_tail() Changes since v5: - Don't use is_power_of_2. Makes things confusing - Don't use the old state to figure out whether or not to enable/disable the sagv, use the new one - Split the loop in skl_disable_sagv into it's own function - Move skl_sagv_enable/disable() calls into intel_atomic_commit_tail() Changes since v4: - Use is_power_of_2 against active_crtcs to check whether we have > 1 pipe enabled - Fix skl_sagv_get_hw_state(): (temp & 0x1) indicates disabled, 0x0 enabled - Call skl_sagv_enable/disable() from pre/post-plane updates Changes since v3: - Use time_before() to compare timeout to jiffies Changes since v2: - Really apply minor style nitpicks to patch this time Changes since v1: - Added comments about this probably being one of the requirements to fixing Skylake's watermark issues - Minor style nitpicks from Matt Roper - Disable these functions on Broxton, since it doesn't have an SAGV Signed-off-by: Lyude Cc: Matt Roper Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-3-git-send-email-cpaul@redhat.com [mlankhorst: ENOSYS -> ENXIO, whitespace fixes] (cherry picked from commit 656d1b89e5ffb83036ab0e2a24be7558f34365c7) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 46e372e..f68c789 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1964,6 +1964,13 @@ struct drm_i915_private { struct i915_suspend_saved_registers regfile; struct vlv_s0ix_state vlv_s0ix_state; + enum { + I915_SKL_SAGV_UNKNOWN = 0, + I915_SKL_SAGV_DISABLED, + I915_SKL_SAGV_ENABLED, + I915_SKL_SAGV_NOT_CONTROLLED + } skl_sagv_status; + struct { /* * Raw watermark latency values: diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2b9508d..bf2cad3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7175,6 +7175,10 @@ enum { #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 #define DISPLAY_IPS_CONTROL 0x19 #define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A +#define GEN9_PCODE_SAGV_CONTROL 0x21 +#define GEN9_SAGV_DISABLE 0x0 +#define GEN9_SAGV_IS_DISABLED 0x1 +#define GEN9_SAGV_ENABLE 0x3 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2a751b6..175595f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13759,6 +13759,13 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) dev_priv->display.modeset_commit_cdclk(state); + /* + * SKL workaround: bspec recommends we disable the SAGV when we + * have more then one pipe enabled + */ + if (IS_SKYLAKE(dev_priv) && !skl_can_enable_sagv(state)) + skl_disable_sagv(dev_priv); + intel_modeset_verify_disabled(dev); } @@ -13832,6 +13839,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state); } + if (IS_SKYLAKE(dev_priv) && intel_state->modeset && + skl_can_enable_sagv(state)) + skl_enable_sagv(dev_priv); + drm_atomic_helper_commit_hw_done(state); if (intel_state->modeset) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cc937a1..ff399b9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1716,6 +1716,9 @@ void ilk_wm_get_hw_state(struct drm_device *dev); void skl_wm_get_hw_state(struct drm_device *dev); void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); +bool skl_can_enable_sagv(struct drm_atomic_state *state); +int skl_enable_sagv(struct drm_i915_private *dev_priv); +int skl_disable_sagv(struct drm_i915_private *dev_priv); uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2c88c2f..496a911 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2852,6 +2852,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev) #define SKL_DDB_SIZE 896 /* in blocks */ #define BXT_DDB_SIZE 512 +#define SKL_SAGV_BLOCK_TIME 30 /* µs */ /* * Return the index of a plane in the SKL DDB and wm result arrays. Primary @@ -2875,6 +2876,153 @@ skl_wm_plane_id(const struct intel_plane *plane) } } +/* + * SAGV dynamically adjusts the system agent voltage and clock frequencies + * depending on power and performance requirements. The display engine access + * to system memory is blocked during the adjustment time. Because of the + * blocking time, having this enabled can cause full system hangs and/or pipe + * underruns if we don't meet all of the following requirements: + * + * - <= 1 pipe enabled + * - All planes can enable watermarks for latencies >= SAGV engine block time + * - We're not using an interlaced display configuration + */ +int +skl_enable_sagv(struct drm_i915_private *dev_priv) +{ + int ret; + + if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED || + dev_priv->skl_sagv_status == I915_SKL_SAGV_ENABLED) + return 0; + + DRM_DEBUG_KMS("Enabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_ENABLE); + + /* We don't need to wait for the SAGV when enabling */ + mutex_unlock(&dev_priv->rps.hw_lock); + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (ret == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED; + return 0; + } else if (ret < 0) { + DRM_ERROR("Failed to enable the SAGV\n"); + return ret; + } + + dev_priv->skl_sagv_status = I915_SKL_SAGV_ENABLED; + return 0; +} + +static int +skl_do_sagv_disable(struct drm_i915_private *dev_priv) +{ + int ret; + uint32_t temp = GEN9_SAGV_DISABLE; + + ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, + &temp); + if (ret) + return ret; + else + return temp & GEN9_SAGV_IS_DISABLED; +} + +int +skl_disable_sagv(struct drm_i915_private *dev_priv) +{ + int ret, result; + + if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED || + dev_priv->skl_sagv_status == I915_SKL_SAGV_DISABLED) + return 0; + + DRM_DEBUG_KMS("Disabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + /* bspec says to keep retrying for at least 1 ms */ + ret = wait_for(result = skl_do_sagv_disable(dev_priv), 1); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret == -ETIMEDOUT) { + DRM_ERROR("Request to disable SAGV timed out\n"); + return -ETIMEDOUT; + } + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (result == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED; + return 0; + } else if (result < 0) { + DRM_ERROR("Failed to disable the SAGV\n"); + return result; + } + + dev_priv->skl_sagv_status = I915_SKL_SAGV_DISABLED; + return 0; +} + +bool skl_can_enable_sagv(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_crtc *crtc; + enum pipe pipe; + int level, plane; + + /* + * SKL workaround: bspec recommends we disable the SAGV when we have + * more then one pipe enabled + * + * If there are no active CRTCs, no additional checks need be performed + */ + if (hweight32(intel_state->active_crtcs) == 0) + return true; + else if (hweight32(intel_state->active_crtcs) > 1) + return false; + + /* Since we're now guaranteed to only have one active CRTC... */ + pipe = ffs(intel_state->active_crtcs) - 1; + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + + if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE) + return false; + + for_each_plane(dev_priv, pipe, plane) { + /* Skip this plane if it's not enabled */ + if (intel_state->wm_results.plane[pipe][plane][0] == 0) + continue; + + /* Find the highest enabled wm level for this plane */ + for (level = ilk_wm_max_level(dev); + intel_state->wm_results.plane[pipe][plane][level] == 0; --level) + { } + + /* + * If any of the planes on this pipe don't enable wm levels + * that incur memory latencies higher then 30µs we can't enable + * the SAGV + */ + if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME) + return false; + } + + return true; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, -- cgit v0.10.2 From 9909113cc48a7ce6e772573e3cc82a3f03ffa8ef Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 17 Aug 2016 15:55:55 -0400 Subject: drm/i915/gen9: Only copy WM results for changed pipes to skl_hw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we write watermark values to the hardware, those values are stored in dev_priv->wm.skl_hw. However with recent watermark changes, the results structure we're copying from only contains valid watermark and DDB values for the pipes that are actually changing; the values for other pipes remain 0. Thus a blind copy of the entire skl_wm_values structure will clobber the values for unchanged pipes...we need to be more selective and only copy over the values for the changing pipes. This mistake was hidden until recently due to another bug that caused us to erroneously re-calculate watermarks for all active pipes rather than changing pipes. Only when that bug was fixed was the impact of this bug discovered (e.g., modesets failing with "Requested display configuration exceeds system watermark limitations" messages and leaving watermarks non-functional, even ones initiated by intel_fbdev_restore_mode). Changes since v1: - Add a function for copying a pipe's wm values (skl_copy_wm_for_pipe()) so we can reuse this later Fixes: 734fa01f3a17 ("drm/i915/gen9: Calculate watermarks during atomic 'check' (v2)") Fixes: 9b6130227495 ("drm/i915/gen9: Re-allocate DDB only for changed pipes") Signed-off-by: Matt Roper Signed-off-by: Lyude Reviewed-by: Matt Roper Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Radhakrishna Sripada Cc: Hans de Goede Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-4-git-send-email-cpaul@redhat.com (cherry picked from commit 2722efb90b3420dee54b4cb3cdc7917efacc2dce) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 496a911..70dcf8d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4109,6 +4109,24 @@ skl_compute_ddb(struct drm_atomic_state *state) return 0; } +static void +skl_copy_wm_for_pipe(struct skl_wm_values *dst, + struct skl_wm_values *src, + enum pipe pipe) +{ + dst->wm_linetime[pipe] = src->wm_linetime[pipe]; + memcpy(dst->plane[pipe], src->plane[pipe], + sizeof(dst->plane[pipe])); + memcpy(dst->plane_trans[pipe], src->plane_trans[pipe], + sizeof(dst->plane_trans[pipe])); + + dst->ddb.pipe[pipe] = src->ddb.pipe[pipe]; + memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], + sizeof(dst->ddb.y_plane[pipe])); + memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], + sizeof(dst->ddb.plane[pipe])); +} + static int skl_compute_wm(struct drm_atomic_state *state) { @@ -4181,8 +4199,10 @@ static void skl_update_wm(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct skl_wm_values *results = &dev_priv->wm.skl_results; + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; + int pipe; if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) return; @@ -4194,8 +4214,12 @@ static void skl_update_wm(struct drm_crtc *crtc) skl_write_wm_values(dev_priv, results); skl_flush_wm_values(dev_priv, results); - /* store the new configuration */ - dev_priv->wm.skl_hw = *results; + /* + * Store the new configuration (but only for the pipes that have + * changed; the other values weren't recomputed). + */ + for_each_pipe_masked(dev_priv, pipe, results->dirty_pipes) + skl_copy_wm_for_pipe(hw_vals, results, pipe); mutex_unlock(&dev_priv->wm.wm_mutex); } -- cgit v0.10.2 From 762c60ab0257d25eea8db3e3fec85ed53b5330fe Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 17 Aug 2016 15:55:57 -0400 Subject: drm/i915/skl: Ensure pipes with changed wms get added to the state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we're enabling a pipe, we'll need to modify the watermarks on all active planes. Since those planes won't be added to the state on their own, we need to add them ourselves. Signed-off-by: Lyude Reviewed-by: Matt Roper Cc: stable@vger.kernel.org Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Radhakrishna Sripada Cc: Hans de Goede Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-6-git-send-email-cpaul@redhat.com (cherry picked from commit 05a76d3d6ad1ee9f9814f88949cc9305fc165460) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 70dcf8d..53e13c1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4104,6 +4104,10 @@ skl_compute_ddb(struct drm_atomic_state *state) ret = skl_allocate_pipe_ddb(cstate, ddb); if (ret) return ret; + + ret = drm_atomic_add_affected_planes(state, &intel_crtc->base); + if (ret) + return ret; } return 0; -- cgit v0.10.2 From 177d91aaea4bcafb29232336bafaa521b85286aa Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 15 Aug 2016 15:09:27 +0200 Subject: drm/i915: Fix botched merge that downgrades CSR versions. Merge commit 5e580523d9128a4d8 reverts the version bumping parts of commit 4aa7fb9c3c4fa0. Bump the versions again and request the specific firmware version. The currently recommended versions are: SKL 1.26, KBL 1.01 and BXT 1.07. Cc: Patrik Jakobsson Cc: Imre Deak Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97242 Cc: drm-intel-fixes@lists.freedesktop.org Fixes: 5e580523d912 ("Backmerge tag 'v4.7' into drm-next") Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1471266567-22443-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 536ab3ca19ef856e84389a155c5832c68559a28a) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 3edb958..c3b33a1 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -41,15 +41,15 @@ * be moved to FW_FAILED. */ -#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin" +#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); #define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) -#define I915_CSR_SKL "i915/skl_dmc_ver1.bin" +#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin" MODULE_FIRMWARE(I915_CSR_SKL); -#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 23) +#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 26) -#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin" +#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin" MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) -- cgit v0.10.2 From e9e5e3fae8da7e237049e00e0bfc9e32fd808fe8 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 22 Aug 2016 12:47:43 +0200 Subject: bdev: fix NULL pointer dereference I got this: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) CPU: 0 PID: 5505 Comm: syz-executor Not tainted 4.8.0-rc2+ #161 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 task: ffff880113415940 task.stack: ffff880118350000 RIP: 0010:[] [] bd_mount+0x52/0xa0 RSP: 0018:ffff880118357ca0 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffffffffffffffff RCX: ffffc90000bb6000 RDX: 0000000000000018 RSI: ffffffff846d6b20 RDI: 00000000000000c7 RBP: ffff880118357cb0 R08: ffff880115967c68 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801188211e8 R13: ffffffff847baa20 R14: ffff8801139cb000 R15: 0000000000000080 FS: 00007fa3ff6c0700(0000) GS:ffff88011aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc1d8cc7e78 CR3: 0000000109f20000 CR4: 00000000000006f0 DR0: 000000000000001e DR1: 000000000000001e DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Stack: ffff880112cfd6c0 ffff8801188211e8 ffff880118357cf0 ffffffff8167f207 ffffffff816d7a1e ffff880112a413c0 ffffffff847baa20 ffff8801188211e8 0000000000000080 ffff880112cfd6c0 ffff880118357d38 ffffffff816dce0a Call Trace: [] mount_fs+0x97/0x2e0 [] ? alloc_vfsmnt+0x55e/0x760 [] vfs_kern_mount+0x7a/0x300 [] ? _raw_read_unlock+0x2c/0x50 [] do_mount+0x3d7/0x2730 [] ? trace_do_page_fault+0x1f4/0x3a0 [] ? copy_mount_string+0x40/0x40 [] ? memset+0x31/0x40 [] ? copy_mount_options+0x1ee/0x320 [] SyS_mount+0xb2/0x120 [] ? copy_mnt_ns+0x970/0x970 [] do_syscall_64+0x1c4/0x4e0 [] entry_SYSCALL64_slow_path+0x25/0x25 Code: 83 e8 63 1b fc ff 48 85 c0 48 89 c3 74 4c e8 56 35 d1 ff 48 8d bb c8 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 36 4c 8b a3 c8 00 00 00 48 b8 00 00 00 00 00 fc RIP [] bd_mount+0x52/0xa0 RSP ---[ end trace 13690ad962168b98 ]--- mount_pseudo() returns ERR_PTR(), not NULL, on error. Fixes: 3684aa7099e0 ("block-dev: enable writeback cgroup support") Cc: Shaohua Li Cc: Tejun Heo Cc: Jens Axboe Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Jens Axboe diff --git a/fs/block_dev.c b/fs/block_dev.c index c3cdde8..e17bdbd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -646,7 +646,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type, { struct dentry *dent; dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); - if (dent) + if (!IS_ERR(dent)) dent->d_sb->s_iflags |= SB_I_CGROUPWB; return dent; } -- cgit v0.10.2 From 209c721ce27022656a9d792b89572c4fdd7d31a7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 21 Aug 2016 15:37:16 +0000 Subject: ASoC: max98371: Add terminate entry for i2c_device_id tables Make sure i2c_device_id tables are NULL terminated. Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/max98371.c b/sound/soc/codecs/max98371.c index cf0a39b..02352ed 100644 --- a/sound/soc/codecs/max98371.c +++ b/sound/soc/codecs/max98371.c @@ -412,6 +412,7 @@ static int max98371_i2c_remove(struct i2c_client *client) static const struct i2c_device_id max98371_i2c_id[] = { { "max98371", 0 }, + { } }; MODULE_DEVICE_TABLE(i2c, max98371_i2c_id); -- cgit v0.10.2 From 6a33fa2b87513fee44cb8f0cd17b1acd6316bc6b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:07:14 +0100 Subject: irqchip/mips-gic: Cleanup chip and handler setup gic_shared_irq_domain_map() is called from gic_irq_domain_alloc() where the wrong chip has been set, and is then overwritten. Tidy this up by setting the correct chip the first time, and setting the handle_level_irq handler from gic_irq_domain_alloc() too. gic_shared_irq_domain_map() is also called from gic_irq_domain_map(), which now calls irq_set_chip_and_handler() to retain its previous behaviour. This patch prepares for a follow-on which will call gic_shared_irq_domain_map() from a callback where the lock on the struct irq_desc is held, which without this change would cause the call to irq_set_chip_and_handler() to lead to a deadlock. Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Jason Cooper Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160819170715.27820-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index c5f33c3..2e0f499 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -713,9 +713,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, unsigned long flags; int i; - irq_set_chip_and_handler(virq, &gic_level_irq_controller, - handle_level_irq); - spin_lock_irqsave(&gic_lock, flags); gic_map_to_pin(intr, gic_cpu_pin); gic_map_to_vpe(intr, mips_cm_vp_id(vpe)); @@ -732,6 +729,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, { if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS) return gic_local_irq_domain_map(d, virq, hw); + + irq_set_chip_and_handler(virq, &gic_level_irq_controller, + handle_level_irq); + return gic_shared_irq_domain_map(d, virq, hw, 0); } @@ -771,11 +772,13 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i); ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq, - &gic_edge_irq_controller, + &gic_level_irq_controller, NULL); if (ret) goto error; + irq_set_handler(virq + i, handle_level_irq); + ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu); if (ret) goto error; -- cgit v0.10.2 From 2564970a381651865364974ea414384b569cb9c0 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:07:15 +0100 Subject: irqchip/mips-gic: Implement activate op for device domain If an IRQ is setup using __setup_irq(), which is used by the request_irq() family of functions, and we are using an SMP kernel then the affinity of the IRQ will be set via setup_affinity() immediately after the IRQ is enabled. This call to gic_set_affinity() will lead to the interrupt being mapped to a VPE. However there are other ways to use IRQs which don't cause affinity to be set, for example if it is used to chain to another IRQ controller with irq_set_chained_handler_and_data(). The irq_set_chained_handler_and_data() code path will enable the IRQ, but will not trigger a call to gic_set_affinity() and in this case nothing will map the interrupt to a VPE, meaning that the interrupt is never received. Fix this by implementing the activate operation for the GIC device IRQ domain, using gic_shared_irq_domain_map() to map the interrupt to the correct pin of cpu 0. Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Jason Cooper Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160819170715.27820-2-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 2e0f499..83f4983 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -893,10 +893,17 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq, return; } +static void gic_dev_domain_activate(struct irq_domain *domain, + struct irq_data *d) +{ + gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0); +} + static struct irq_domain_ops gic_dev_domain_ops = { .xlate = gic_dev_domain_xlate, .alloc = gic_dev_domain_alloc, .free = gic_dev_domain_free, + .activate = gic_dev_domain_activate, }; static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, -- cgit v0.10.2 From e6a00f6684c9f348cd782922b4b277c68ab90b63 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 27 Jul 2016 01:24:52 -0700 Subject: IB/mlx4: Make function use_tunnel_data return void No need to return int if function always returns 0 Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index d6fc8a6..15b6289 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) checksum == cpu_to_be16(0xffff); } -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe, int is_eth) +static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); } - - return 0; } static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, @@ -852,9 +850,11 @@ repoll: if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, - cqe, is_eth); + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + use_tunnel_data(*cur_qp, cq, wc, tail, cqe, + is_eth); + return 0; + } } wc->slid = be16_to_cpu(cqe->rlid); -- cgit v0.10.2 From 5412352fcd8fba7f278ae8c9ba36296716f17ae8 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 27 Jul 2016 01:24:53 -0700 Subject: IB/mlx4: Return EAGAIN for any error in mlx4_ib_poll_one Error code EAGAIN should be used when errors are temporary and next call might succeeds. When error code other than EAGAIN is returned, the caller (mlx4_ib_poll) will assume all CQE in the same bunch are error too and will drop them all. Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 15b6289..006db64 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -690,7 +690,7 @@ repoll: if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && is_send)) { pr_warn("Completion for NOP opcode detected!\n"); - return -EINVAL; + return -EAGAIN; } /* Resize CQ in progress */ @@ -721,7 +721,7 @@ repoll: if (unlikely(!mqp)) { pr_warn("CQ %06x with entry for unknown QPN %06x\n", cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EINVAL; + return -EAGAIN; } *cur_qp = to_mibqp(mqp); @@ -739,7 +739,7 @@ repoll: if (unlikely(!msrq)) { pr_warn("CQ %06x with entry for unknown SRQN %06x\n", cq->mcq.cqn, srq_num); - return -EINVAL; + return -EAGAIN; } } -- cgit v0.10.2 From 8303f683b161467b6595c153c8751b80f9df3508 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 3 Aug 2016 20:19:32 -0400 Subject: IB/hfi1: Allocate cpu mask on the heap to silence warning If CONFIG_FRAME_WARN is small (1K) and CONFIG_NR_CPUS big then a frame size warning is triggered during build. Allocate the cpu mask dynamically to silence the warning. Reviewed-by: Sebastian Sanchez Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 79575ee..9bbb214 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -682,7 +682,7 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, size_t count) { struct hfi1_affinity_node *entry; - struct cpumask mask; + cpumask_var_t mask; int ret, i; spin_lock(&node_affinity.lock); @@ -692,19 +692,24 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (!entry) return -EINVAL; - ret = cpulist_parse(buf, &mask); + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + ret = cpulist_parse(buf, mask); if (ret) - return ret; + goto out; - if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { + if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { dd_dev_warn(dd, "Invalid CPU mask\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } mutex_lock(&sdma_affinity_mutex); /* reset the SDMA interrupt affinity details */ init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, &mask); + cpumask_copy(&entry->def_intr.mask, mask); /* * Reassign the affinity for each SDMA interrupt. */ @@ -720,8 +725,9 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (ret) break; } - mutex_unlock(&sdma_affinity_mutex); +out: + free_cpumask_var(mask); return ret ? ret : strnlen(buf, PAGE_SIZE); } -- cgit v0.10.2 From 23d70503ee187819a3775c7ac73f17c5bfe3fad0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 5 Aug 2016 13:46:49 +0000 Subject: IB/core: Fix possible memory leak in cma_resolve_iboe_route() 'work' and 'route->path_rec' are malloced in cma_resolve_iboe_route() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: 200298326b27 ('IB/core: Validate route when we init ah') Signed-off-by: Wei Yongjun Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e6dfa1b..5f65a78 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } if (ndev->flags & IFF_LOOPBACK) { dev_put(ndev); - if (!id_priv->id.device->get_netdev) - return -EOPNOTSUPP; + if (!id_priv->id.device->get_netdev) { + ret = -EOPNOTSUPP; + goto err2; + } ndev = id_priv->id.device->get_netdev(id_priv->id.device, id_priv->id.port_num); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } } route->path_rec->net = &init_net; -- cgit v0.10.2 From 1d5840c971455ad4ecece3f72012961cac8d0f00 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 7 Aug 2016 12:20:38 +0000 Subject: IB/isert: fix error return code in isert_alloc_login_buf() Fix to return error code -ENOMEM from the alloc error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Acked-by: Sagi Grimberg Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ba6be06..7914c14 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -448,7 +448,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { - isert_err("Unable to allocate isert_conn->login_rspbuf\n"); + ret = -ENOMEM; goto out_unmap_login_req_buf; } -- cgit v0.10.2 From abb658ef0529488cfa75e79a2a9f894cb95eaec3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 09:49:47 +0000 Subject: IB/hfi1: Remove duplicated include from affinity.c Remove duplicated include. Signed-off-by: Wei Yongjun Reviewed-by: Leon Romanovsky Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 9bbb214..0566393 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,7 +47,6 @@ #include #include #include -#include #include "hfi.h" #include "affinity.h" -- cgit v0.10.2 From c62fb260a86dde3df5b2905432caa0e9f6898434 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 Aug 2016 11:17:37 -0400 Subject: IB/hfi1,IB/qib: Fix qp_stats sleep with rcu read lock held The qp init function does a kzalloc() while holding the RCU lock that encounters the following warning with a debug kernel when a cat of the qp_stats is done: [ 231.723948] rcu_scheduler_active = 1, debug_locks = 0 [ 231.731939] 3 locks held by cat/11355: [ 231.736492] #0: (debugfs_srcu){......}, at: [] debugfs_use_file_start+0x5/0x90 [ 231.746955] #1: (&p->lock){+.+.+.}, at: [] seq_read+0x4c/0x3c0 [ 231.755873] #2: (rcu_read_lock){......}, at: [] _qp_stats_seq_start+0x5/0xd0 [hfi1] [ 231.766862] The init functions do an implicit next which requires the rcu read lock before the kzalloc(). Fix for both drivers is to change the scope of the init function to only do the allocation and the initialization of the just allocated iter. The implict next is moved back into the respective start functions to fix the issue. Signed-off-by: Ira Weiny Signed-off-by: Mike Marciniszyn CC: # 4.6.x- Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9..a49cc88 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -223,28 +223,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats) DEBUGFS_FILE_OPS(ctx_stats); static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) + __acquires(RCU) { struct qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qp_iter *iter = iter_ptr; @@ -259,7 +263,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) -__releases(RCU) + __releases(RCU) { rcu_read_unlock(); } diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index a5aa351..4e4d831 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) iter->dev = dev; iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; - if (qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5e75b43..5bad8e3 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) DEBUGFS_FILE(ctx_stats) static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) + __acquires(RCU) { struct qib_qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qib_qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qib_qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qib_qp_iter *iter = iter_ptr; @@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) + __releases(RCU) { rcu_read_unlock(); } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 9cc0aae..f9b8cd2 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -573,10 +573,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) return NULL; iter->dev = dev; - if (qib_qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } -- cgit v0.10.2 From 140690eae7626a820ea135f33ff9fe11c1bcee6d Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 9 Aug 2016 11:17:18 -0400 Subject: IB/hfi1: Fetch monitor values on-demand for CableInfo query The monitor values from bytes 22 through 81 of the QSFP memory space (SFF 8636) are dynamic and serving them out of the QSFP memory cache maintained by the driver provides stale data to the CableInfo SMA query. This patch refreshes the dynamic values from the QSFP memory on request and overwrites the stale data from the cache for the overlap between the requested range and the monitor range. Reviewed-by: Jubin John Reviewed-by: Ira Weiny Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index a207717..4e95ad8 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, u8 *data) { struct hfi1_pportdata *ppd; - u32 excess_len = 0; - int ret = 0; + u32 excess_len = len; + int ret = 0, offset = 0; if (port_num > dd->num_pports || port_num < 1) { dd_dev_info(dd, "%s: Invalid port number %d\n", @@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, } memcpy(data, &ppd->qsfp_info.cache[addr], len); + + if (addr <= QSFP_MONITOR_VAL_END && + (addr + len) >= QSFP_MONITOR_VAL_START) { + /* Overlap with the dynamic channel monitor range */ + if (addr < QSFP_MONITOR_VAL_START) { + if (addr + len <= QSFP_MONITOR_VAL_END) + len = addr + len - QSFP_MONITOR_VAL_START; + else + len = QSFP_MONITOR_RANGE; + offset = QSFP_MONITOR_VAL_START - addr; + addr = QSFP_MONITOR_VAL_START; + } else if (addr == QSFP_MONITOR_VAL_START) { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_RANGE; + } else { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_VAL_END - addr + 1; + } + /* Refresh the values of the dynamic monitors from the cable */ + ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len); + if (ret != len) { + ret = -EAGAIN; + goto set_zeroes; + } + } + return 0; set_zeroes: diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index 69275eb..36cf523 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -74,6 +74,9 @@ /* Defined fields that Intel requires of qualified cables */ /* Byte 0 is Identifier, not checked */ /* Byte 1 is reserved "status MSB" */ +#define QSFP_MONITOR_VAL_START 22 +#define QSFP_MONITOR_VAL_END 81 +#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1) #define QSFP_TX_CTRL_BYTE_OFFS 86 #define QSFP_PWR_CTRL_BYTE_OFFS 93 #define QSFP_CDR_CTRL_BYTE_OFFS 98 -- cgit v0.10.2 From c867caaf8ecd5498d399558d0f11825f175c6cdd Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 9 Aug 2016 11:19:55 -0400 Subject: IB/hfi1: Pass packet ptr to set_armed_active The "packet" parameter was being passed on the stack, change it to a pointer. Reviewed-by: Don Hiatt Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 8246dc7..c415687 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -888,14 +888,14 @@ void set_all_slowpath(struct hfi1_devdata *dd) } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, - struct hfi1_packet packet, + struct hfi1_packet *packet, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd, - packet.rhf_addr); + struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); - if (hdr2sc(hdr, packet.rhf) != 0xf) { + if (hdr2sc(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -979,7 +979,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) /* Auto activate link on non-SC15 packet receive */ if (unlikely(rcd->ppd->host_link_state == HLS_UP_ARMED) && - set_armed_to_active(rcd, packet, dd)) + set_armed_to_active(rcd, &packet, dd)) goto bail; last = process_rcv_packet(&packet, thread); } -- cgit v0.10.2 From 69b9f4a4233b6f9ea168c25123b9eb629739d8e5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 9 Aug 2016 11:19:56 -0400 Subject: IB/hfi1: Validate header in set_armed_active Validate the etype to insure that the header is correct. Reviewed-by: Don Hiatt Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c415687..303f105 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -894,8 +894,9 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd, packet->rhf_addr); + u8 etype = rhf_rcv_type(packet->rhf); - if (hdr2sc(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { -- cgit v0.10.2 From 476d95bd02240894806ebe64f1c4dcf6dbba87f4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Aug 2016 03:14:04 +0000 Subject: IB/hfi1: Using kfree_rcu() to simplify the code The callback function of call_rcu() just calls a kfree(), so we can use kfree_rcu() instead of call_rcu() + callback function. Signed-off-by: Wei Yongjun Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1000e0f..f41414e 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1656,7 +1656,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, const struct pci_device_id *); void hfi1_free_devdata(struct hfi1_devdata *); -void cc_state_reclaim(struct rcu_head *rcu); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a358d23..b793545 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1333,7 +1333,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_unlock(&ppd->cc_state_lock); if (cc_state) - call_rcu(&cc_state->rcu, cc_state_reclaim); + kfree_rcu(cc_state, rcu); } free_credit_return(dd); diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1263abe..95c43e1 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -3398,7 +3398,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + kfree_rcu(old_cc_state, rcu); } static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, @@ -3553,13 +3553,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } -void cc_state_reclaim(struct rcu_head *rcu) -{ - struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu); - - kfree(cc_state); -} - static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) -- cgit v0.10.2 From 57bb562ad4651453c9f20d506a65e46e4d11042f Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Wed, 10 Aug 2016 07:34:27 +0200 Subject: IB/hfi1: Add missing error code assignment before test It is likely that checking the result of 'setup_ctxt' is expected here. Signed-off-by: Christophe JAILLET Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1ecbec1..ed76be3 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -222,7 +222,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ret = assign_ctxt(fp, &uinfo); if (ret < 0) return ret; - setup_ctxt(fp); + ret = setup_ctxt(fp); if (ret) return ret; ret = user_init(fp); -- cgit v0.10.2 From 86cd747c6dd6cdbc825e36ad5f0029f3c5a37776 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Wed, 10 Aug 2016 17:45:01 +0200 Subject: IB/usnic: Fix error return code If 'pci_register_driver' fails, we return 'err' which is known to be 0. Return the error instead. Signed-off-by: Christophe JAILLET Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c229b9f..0a89a95 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -664,7 +664,8 @@ static int __init usnic_ib_init(void) return err; } - if (pci_register_driver(&usnic_ib_pci_driver)) { + err = pci_register_driver(&usnic_ib_pci_driver); + if (err) { usnic_err("Unable to register with PCI\n"); goto out_umem_fini; } -- cgit v0.10.2 From 701b4bf6e3eeff1a856d6889e1ebb35edd6c019a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 16 Aug 2016 12:29:46 +0300 Subject: MAINTAINERS: Fix Soft RoCE location The Soft RoCE (rxe) is located in drivers/inifiniband/sw and not in drivers/infiniband/hw/. This patch fixes it. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b1..fc2613d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7661,7 +7661,7 @@ L: linux-rdma@vger.kernel.org S: Supported W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home Q: http://patchwork.kernel.org/project/linux-rdma/list/ -F: drivers/infiniband/hw/rxe/ +F: drivers/infiniband/sw/rxe/ F: include/uapi/rdma/rdma_user_rxe.h MEMBARRIER SUPPORT -- cgit v0.10.2 From f29a08dc145e05de6a57f0aeaba6020464f80e15 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 16 Aug 2016 13:25:34 -0700 Subject: IB/hfi1: Return invalid field for non-QSFP CableInfo queries The driver does not check if the CableInfo query is supported for the port type. Return early if CableInfo is not supported for the port type, making compliance with the specification explicit and preventing lower level code from potentially doing the wrong thing if the query is not supported for the hardware implementation. Reviewed-by: Ira Weiny Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 95c43e1..39e42c3 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1819,6 +1819,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, u32 len = OPA_AM_CI_LEN(am) + 1; int ret; + if (dd->pport->port_type != PORT_TYPE_QSFP) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */ #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1) #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK) -- cgit v0.10.2 From 08fe16f6192bccd5798e9b60461f7aa151b34cd4 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Tue, 16 Aug 2016 13:26:12 -0700 Subject: IB/hfi1: Improve J_KEY generation Previously, J_KEY generation was based on the lower 16 bits of the user's UID. While this works, it was not good enough as a non-root user could collide with a root user given a sufficiently large UID. This patch attempt to improve the J_KEY generation by using the following algorithm: The 16 bit J_KEY space is partitioned into 3 separate spaces reserved for different user classes: * all users with administtor privileges (including 'root') will use J_KEYs in the range of 0 to 31, * all kernel protocols, which use KDETH packets will use J_KEYs in the range of 32 to 63, and * all other users will use J_KEYs in the range of 64 to 65535. The above separation is aimed at preventing different user levels from sending packets to each other and, additionally, separate kernel protocols from all other types of users. The later is meant to prevent the potential corruption of kernel memory by any other type of user. Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index f41414e..a021e66 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1272,9 +1272,26 @@ static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) ((!!(rhf_dc_info(rhf))) << 4); } +#define HFI1_JKEY_WIDTH 16 +#define HFI1_JKEY_MASK (BIT(16) - 1) +#define HFI1_ADMIN_JKEY_RANGE 32 + +/* + * J_KEYs are split and allocated in the following groups: + * 0 - 31 - users with administrator privileges + * 32 - 63 - kernel protocols using KDETH packets + * 64 - 65535 - all other users using KDETH packets + */ static inline u16 generate_jkey(kuid_t uid) { - return from_kuid(current_user_ns(), uid) & 0xffff; + u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK; + + if (capable(CAP_SYS_ADMIN)) + jkey &= HFI1_ADMIN_JKEY_RANGE - 1; + else if (jkey < 64) + jkey |= BIT(HFI1_JKEY_WIDTH - 1); + + return jkey; } /* -- cgit v0.10.2 From 56c8ca510de2884b2f741e5fd8d3df6161378de6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 16 Aug 2016 13:26:29 -0700 Subject: IB/rdmvat: Fix double vfree() in rvt_create_qp() error path The unwind logic for creating a user QP has a double vfree of the non-shared receive queue when handling a "too many qps" failure. The code unwinds the mmmap info by decrementing a reference count which will call rvt_release_mmap_info() which in turn does the vfree() of the r_rq.wq. The unwind code then does the same free. Fix by guarding the vfree() with the same test that is done in close and only do the vfree() if qp->ip is NULL. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bdb540f..870b4f2 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -873,7 +873,8 @@ bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - vfree(qp->r_rq.wq); + if (!qp->ip) + vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); -- cgit v0.10.2 From e0cf75deab8155334c8228eb7f097b15127d0a49 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 16 Aug 2016 13:27:03 -0700 Subject: IB/hfi1: Fix mm_struct use after free Testing with CONFIG_SLUB_DEBUG_ON=y resulted in the kernel panic below. This is the result of the mm_struct sometimes being free'd prior to hfi1_file_close being called. This was due to the combination of 2 reasons: 1) hfi1_file_close is deferred in process exit and it therefore may not be called synchronously with process exit. 2) exit_mm is called prior to exit_files in do_exit. Normally this is ok however, our kernel bypass code requires us to have access to the mm_struct for house keeping both at "normal" close time as well as at process exit. Therefore, the fix is to simply keep a reference to the mm_struct until we are done with it. [ 3006.340150] general protection fault: 0000 [#1] SMP [ 3006.346469] Modules linked in: hfi1 rdmavt rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm dm_mirror dm_region_hash dm_log dm_mod snd_hda_code c_realtek iTCO_wdt snd_hda_codec_generic iTCO_vendor_support sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass c rct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw snd_hda_intel gf128mul snd_hda_codec glue_helper snd_hda_core ablk_helper sn d_hwdep cryptd snd_seq snd_seq_device snd_pcm snd_timer snd soundcore pcspkr shpchp mei_me sg lpc_ich mei i2c_i801 mfd_core ioatdma ipmi_devi ntf wmi ipmi_si ipmi_msghandler acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 jbd2 mbcache mlx4_en ib_core sr_mod s d_mod cdrom crc32c_intel mgag200 drm_kms_helper syscopyarea sysfillrect igb sysimgblt fb_sys_fops ptp mlx4_core ttm isci pps_core ahci drm li bsas libahci dca firewire_ohci i2c_algo_bit scsi_transport_sas firewire_core crc_itu_t i2c_core libata [last unloaded: mlx4_ib] [ 3006.461759] CPU: 16 PID: 11624 Comm: mpi_stress Not tainted 4.7.0-rc5+ #1 [ 3006.469915] Hardware name: Intel Corporation W2600CR ........../W2600CR, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013 [ 3006.483027] task: ffff8804102f0040 ti: ffff8804102f8000 task.ti: ffff8804102f8000 [ 3006.491971] RIP: 0010:[] [] __lock_acquire+0xb3/0x19e0 [ 3006.501905] RSP: 0018:ffff8804102fb908 EFLAGS: 00010002 [ 3006.508447] RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000001 RCX: 0000000000000000 [ 3006.517012] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff880410b56a40 [ 3006.525569] RBP: ffff8804102fb9b0 R08: 0000000000000001 R09: 0000000000000000 [ 3006.534119] R10: ffff8804102f0040 R11: 0000000000000000 R12: 0000000000000000 [ 3006.542664] R13: ffff880410b56a40 R14: 0000000000000000 R15: 0000000000000000 [ 3006.551203] FS: 00007ff478c08700(0000) GS:ffff88042e200000(0000) knlGS:0000000000000000 [ 3006.560814] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3006.567806] CR2: 00007f667f5109e0 CR3: 0000000001c06000 CR4: 00000000000406e0 [ 3006.576352] Stack: [ 3006.579157] ffffffff8124b819 ffffffffffffffff 0000000000000000 ffff8804102fb940 [ 3006.588072] 0000000000000002 0000000000000000 ffff8804102f0040 0000000000000007 [ 3006.596971] 0000000000000006 ffff8803cad6f000 0000000000000000 ffff8804102f0040 [ 3006.605878] Call Trace: [ 3006.609220] [] ? uncharge_batch+0x109/0x250 [ 3006.616382] [] lock_acquire+0xd3/0x220 [ 3006.623056] [] ? hfi1_release_user_pages+0x7c/0xa0 [hfi1] [ 3006.631593] [] down_write+0x49/0x80 [ 3006.638022] [] ? hfi1_release_user_pages+0x7c/0xa0 [hfi1] [ 3006.646569] [] hfi1_release_user_pages+0x7c/0xa0 [hfi1] [ 3006.654898] [] cacheless_tid_rb_remove+0x106/0x330 [hfi1] [ 3006.663417] [] ? mark_held_locks+0x66/0x90 [ 3006.670498] [] ? _raw_spin_unlock_irqrestore+0x36/0x60 [ 3006.678741] [] tid_rb_remove+0xe/0x10 [hfi1] [ 3006.686010] [] hfi1_mmu_rb_unregister+0xc5/0x100 [hfi1] [ 3006.694387] [] hfi1_user_exp_rcv_free+0x39/0x120 [hfi1] [ 3006.702732] [] hfi1_file_close+0x17a/0x330 [hfi1] [ 3006.710489] [] __fput+0xfa/0x230 [ 3006.716595] [] ____fput+0xe/0x10 [ 3006.722696] [] task_work_run+0x86/0xc0 [ 3006.729379] [] do_exit+0x323/0xc40 [ 3006.735672] [] do_group_exit+0x4c/0xc0 [ 3006.742371] [] get_signal+0x345/0x940 [ 3006.748958] [] do_signal+0x37/0x700 [ 3006.755328] [] ? poll_select_set_timeout+0x5a/0x90 [ 3006.763146] [] ? __audit_syscall_exit+0x1db/0x260 [ 3006.770853] [] ? rcu_read_lock_sched_held+0x93/0xa0 [ 3006.778765] [] ? kfree+0x1e4/0x2a0 [ 3006.784986] [] ? exit_to_usermode_loop+0x33/0xac [ 3006.792551] [] exit_to_usermode_loop+0x5e/0xac [ 3006.799907] [] do_syscall_64+0x12a/0x190 [ 3006.806664] [] entry_SYSCALL64_slow_path+0x25/0x25 [ 3006.814396] Code: 24 08 44 89 44 24 10 89 4c 24 18 e8 a8 d8 ff ff 48 85 c0 8b 4c 24 18 44 8b 44 24 10 44 8b 4c 24 08 4c 8b 14 24 0f 84 30 08 00 00 ff 80 98 01 00 00 8b 3d 48 ad be 01 45 8b a2 90 0b 00 00 85 [ 3006.837158] RIP [] __lock_acquire+0xb3/0x19e0 [ 3006.844401] RSP [ 3006.851170] ---[ end trace b7b9f21cf06c27df ]--- [ 3006.927420] Kernel panic - not syncing: Fatal exception [ 3006.933954] Kernel Offset: disabled [ 3006.940961] ---[ end Kernel panic - not syncing: Fatal exception [ 3006.948249] ------------[ cut here ]------------ Fixes: 3faa3d9a308e ("IB/hfi1: Make use of mm consistent") Reviewed-by: Dean Luick Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index ed76be3..7e03ccd 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -183,6 +183,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) if (fd) { fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; + atomic_inc(&fd->mm->mm_count); } fp->private_data = fd; @@ -779,6 +780,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + mmdrop(fdata->mm); kobject_put(&dd->kobj); kfree(fdata); return 0; -- cgit v0.10.2 From 7f446abf12d741f4e29f00c633cb5fa67c2eee71 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 19 Aug 2016 07:29:07 -0700 Subject: iw_cxgb4: limit IRD/ORD advertised to ULP by device max. The i40iw initiator sends an MPA-request with ird = 63, ord = 63. The cxgb4 responder sends a RST. Since the inbound ord=63 and it exceeds the max_ird/c4iw_max_read_depth (=32 default), chelsio decides to abort. Instead, cxgb4 should adjust the ord/ird down before presenting it to the ULP. Reported-by: Shiraz Saleem Signed-off-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3aca7f6..a698efc 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1827,8 +1827,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) (ep->mpa_pkt + sizeof(*mpa)); ep->ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) -- cgit v0.10.2 From 30b03b1528b60623986ee0b50ec0e5dab9094be8 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 19 Aug 2016 07:29:08 -0700 Subject: iw_cxgb4: use the MPA initiator's IRD if < our ORD The i40iw initiator sends an MPA-request with ird=16 and ord=16. The cxgb4 responder sends an MPA-reply with ord = 32 causing i40iw to terminate due to insufficient resources. The logic to reduce the ORD to <= peer's IRD was wrong. Reported-by: Shiraz Saleem Signed-off-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a698efc..b6a953a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3140,7 +3140,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { if (conn_param->ord > ep->ird) { if (RELAXED_IRD_NEGOTIATION) { - ep->ord = ep->ird; + conn_param->ord = ep->ird; } else { ep->ird = conn_param->ird; ep->ord = conn_param->ord; -- cgit v0.10.2 From 5cab4d84780573afbf5077ae9c3f919b4f305f20 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 22 Aug 2016 13:49:59 -0700 Subject: Input: silead - use devm_gpiod_get The silead code is using devm_foo for everything (and does not free any resources). Except that it is using gpiod_get instead of devm_gpiod_get (but is not freeing the gpio_desc), change this to use devm_gpiod_get so that the gpio will be properly released. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 7379fe1..b2744a6 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -464,7 +464,7 @@ static int silead_ts_probe(struct i2c_client *client, return -ENODEV; /* Power GPIO pin */ - data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); + data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); if (IS_ERR(data->gpio_power)) { if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER) dev_err(dev, "Shutdown GPIO request failed\n"); -- cgit v0.10.2 From fae16989be77b09bab86c79233e4b511ea769cea Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 22 Aug 2016 13:25:56 -0700 Subject: Input: tegra-kbc - fix inverted reset logic Commit fe6b0dfaba68 ("Input: tegra-kbc - use reset framework") accidentally converted _deassert to _assert, so there is no code to wake up this hardware. Fixes: fe6b0dfaba68 ("Input: tegra-kbc - use reset framework") Signed-off-by: Masahiro Yamada Acked-by: Thierry Reding Acked-by: Laxman Dewangan Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index 7d61439..0c07e10 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -376,7 +376,7 @@ static int tegra_kbc_start(struct tegra_kbc *kbc) /* Reset the KBC controller to clear all previous status.*/ reset_control_assert(kbc->rst); udelay(100); - reset_control_assert(kbc->rst); + reset_control_deassert(kbc->rst); udelay(100); tegra_kbc_config_pins(kbc); -- cgit v0.10.2 From 3e29d6bb6433ebfa4e187b1164b80baf720d58c3 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Mon, 22 Aug 2016 11:28:11 -0700 Subject: Input: synaptics-rmi4 - fix register descriptor subpacket map construction The map_offset variable is specific to the register and needs to be reset in the loop. Otherwise, subsequent register's subpacket maps will have their bits set at the wrong index. Signed-off-by: Andrew Duggan Tested-by: Nitin Chaudhary Reviewed-by: Benjamin Tissoires Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index faa295e..c83bce8 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -553,7 +553,6 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr, goto free_struct_buff; reg = find_first_bit(rdesc->presense_map, RMI_REG_DESC_PRESENSE_BITS); - map_offset = 0; for (i = 0; i < rdesc->num_registers; i++) { struct rmi_register_desc_item *item = &rdesc->registers[i]; int reg_size = struct_buf[offset]; @@ -576,6 +575,8 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr, item->reg = reg; item->reg_size = reg_size; + map_offset = 0; + do { for (b = 0; b < 7; b++) { if (struct_buf[offset] & (0x1 << b)) -- cgit v0.10.2 From e3a888a4bff0bef0b256d55c58bc32c99fb44ece Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Fri, 19 Aug 2016 10:14:29 -0700 Subject: Input: ads7846 - remove redundant regulator_disable call ADS7846 regulator is disabled twice in a row in ads7846_remove(). Valid one is in ads7846_disable(). Removing the ads7846 module causes warning about unbalanced disables. ... WARNING: CPU: 0 PID: 29269 at drivers/regulator/core.c:2251 _regulator_disable+0xf8/0x130 unbalanced disables for vads7846 CPU: 0 PID: 29269 Comm: rmmod Tainted: G D W 4.7.0+ #3 Hardware name: HTC Magician ... show_stack+0x10/0x14 __warn+0xd8/0x100 warn_slowpath_fmt+0x38/0x48 _regulator_disable+0xf8/0x130 regulator_disable+0x34/0x60 ads7846_remove+0x58/0xd4 [ads7846] spi_drv_remove+0x1c/0x34 __device_release_driver+0x84/0x114 driver_detach+0x8c/0x90 bus_remove_driver+0x5c/0xc8 SyS_delete_module+0x1a0/0x238 ret_fast_syscall+0x0/0x38 Signed-off-by: Petr Cvek Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index a61b215..1ce3ecb 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1473,7 +1473,6 @@ static int ads7846_remove(struct spi_device *spi) ads784x_hwmon_unregister(spi, ts); - regulator_disable(ts->reg); regulator_put(ts->reg); if (!ts->get_pendown_state) { -- cgit v0.10.2 From b47b0cc73032d3c8225b5ea9a077941632f16d91 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:38 +0200 Subject: include/uapi/linux/if_pppox.h: include linux/if.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation error: error: ‘IFNAMSIZ’ undeclared here (not in a function) Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index e128769..473c3c4 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -21,6 +21,7 @@ #include #include +#include #include #include -- cgit v0.10.2 From 1fe8e0f074c77aa41aaa579345a9e675acbebfa9 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:39 +0200 Subject: include/uapi/linux/if_tunnel.h: include linux/if.h, linux/ip.h and linux/in6.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation errors like: error: field ‘iph’ has incomplete type error: field ‘prefix’ has incomplete type Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 1046f55..777b6cd 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -2,6 +2,9 @@ #define _UAPI_IF_TUNNEL_H_ #include +#include +#include +#include #include -- cgit v0.10.2 From 05ee5de7451796cf9a8aeb2f05a57790d4fd2336 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:42 +0200 Subject: include/uapi/linux/if_pppol2tp.h: include linux/in.h and linux/in6.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation errors like: error: field ‘addr’ has incomplete type struct sockaddr_in addr; /* IP address and port to send to */ ^ error: field ‘addr’ has incomplete type struct sockaddr_in6 addr; /* IP address and port to send to */ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h index 163e8ad..4bd1f55 100644 --- a/include/uapi/linux/if_pppol2tp.h +++ b/include/uapi/linux/if_pppol2tp.h @@ -16,7 +16,8 @@ #define _UAPI__LINUX_IF_PPPOL2TP_H #include - +#include +#include /* Structure used to connect() the socket to a particular tunnel UDP * socket over IPv4. -- cgit v0.10.2 From eafe92114308acf14e45c6c3d154a5dad5523d1a Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:43 +0200 Subject: include/uapi/linux/if_pppox.h: include linux/in.h and linux/in6.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation errors: error: field ‘addr’ has incomplete type struct sockaddr_in addr; /* IP address and port to send to */ error: field ‘addr’ has incomplete type struct sockaddr_in6 addr; /* IP address and port to send to */ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 473c3c4..d37bbb1 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -24,6 +24,8 @@ #include #include #include +#include +#include /* For user-space programs to pick up these definitions * which they wouldn't get otherwise without defining __KERNEL__ -- cgit v0.10.2 From e6571aa5cb65ff52a87843652d0d8120a48aae7c Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:55 +0200 Subject: include/uapi/linux/openvswitch.h: use __u32 from linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compiler error: error: unknown type name ‘uint32_t’ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d95a301..645499a 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -632,8 +632,8 @@ enum ovs_hash_alg { * @hash_basis: basis used for computing hash. */ struct ovs_action_hash { - uint32_t hash_alg; /* One of ovs_hash_alg. */ - uint32_t hash_basis; + __u32 hash_alg; /* One of ovs_hash_alg. */ + __u32 hash_basis; }; /** -- cgit v0.10.2 From cf00713a655d3019be7faa184402f16c43a0fed3 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:58 +0200 Subject: include/uapi/linux/atm_zatm.h: include linux/time.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compile error: error: field ‘real’ has incomplete type struct timeval real; /* real (wall-clock) time */ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h index 9c9c6ad..5cd4d4d 100644 --- a/include/uapi/linux/atm_zatm.h +++ b/include/uapi/linux/atm_zatm.h @@ -14,6 +14,7 @@ #include #include +#include #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) /* get pool statistics */ -- cgit v0.10.2 From a1d1f65ff5ac27276a585b41a619d30995bb92fe Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:33:19 +0200 Subject: include/uapi/linux/openvswitch.h: use __u32 from linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel uapi header are supposed to use them. Fixes userspace compile error: linux/openvswitch.h:583:2: error: unknown type name ‘uint32_t’ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 645499a..54c3b4f 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -583,7 +583,7 @@ enum ovs_userspace_attr { #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) struct ovs_action_trunc { - uint32_t max_len; /* Max packet size in bytes. */ + __u32 max_len; /* Max packet size in bytes. */ }; /** -- cgit v0.10.2 From 53dc65d4d33c422d086c9d9ad8c03ab400ffc0a1 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:33:21 +0200 Subject: include/uapi/linux/ipx.h: fix conflicting defitions with glibc netipx/ipx.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes these compiler warnings via libc-compat.h when glibc netipx/ipx.h is included before linux/ipx.h: ./linux/ipx.h:9:8: error: redefinition of ‘struct sockaddr_ipx’ ./linux/ipx.h:26:8: error: redefinition of ‘struct ipx_route_definition’ ./linux/ipx.h:32:8: error: redefinition of ‘struct ipx_interface_definition’ ./linux/ipx.h:49:8: error: redefinition of ‘struct ipx_config_data’ ./linux/ipx.h:58:8: error: redefinition of ‘struct ipx_route_def’ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h index 3d48014..30f031d 100644 --- a/include/uapi/linux/ipx.h +++ b/include/uapi/linux/ipx.h @@ -1,11 +1,13 @@ #ifndef _IPX_H_ #define _IPX_H_ +#include /* for compatibility with glibc netipx/ipx.h */ #include #include #include #define IPX_NODE_LEN 6 #define IPX_MTU 576 +#if __UAPI_DEF_SOCKADDR_IPX struct sockaddr_ipx { __kernel_sa_family_t sipx_family; __be16 sipx_port; @@ -14,6 +16,7 @@ struct sockaddr_ipx { __u8 sipx_type; unsigned char sipx_zero; /* 16 byte fill */ }; +#endif /* __UAPI_DEF_SOCKADDR_IPX */ /* * So we can fit the extra info for SIOCSIFADDR into the address nicely @@ -23,12 +26,15 @@ struct sockaddr_ipx { #define IPX_DLTITF 0 #define IPX_CRTITF 1 +#if __UAPI_DEF_IPX_ROUTE_DEFINITION struct ipx_route_definition { __be32 ipx_network; __be32 ipx_router_network; unsigned char ipx_router_node[IPX_NODE_LEN]; }; +#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */ +#if __UAPI_DEF_IPX_INTERFACE_DEFINITION struct ipx_interface_definition { __be32 ipx_network; unsigned char ipx_device[16]; @@ -45,16 +51,20 @@ struct ipx_interface_definition { #define IPX_INTERNAL 2 unsigned char ipx_node[IPX_NODE_LEN]; }; - +#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */ + +#if __UAPI_DEF_IPX_CONFIG_DATA struct ipx_config_data { unsigned char ipxcfg_auto_select_primary; unsigned char ipxcfg_auto_create_interfaces; }; +#endif /* __UAPI_DEF_IPX_CONFIG_DATA */ /* * OLD Route Definition for backward compatibility. */ +#if __UAPI_DEF_IPX_ROUTE_DEF struct ipx_route_def { __be32 ipx_network; __be32 ipx_router_network; @@ -67,6 +77,7 @@ struct ipx_route_def { #define IPX_RT_BLUEBOOK 2 #define IPX_RT_ROUTED 1 }; +#endif /* __UAPI_DEF_IPX_ROUTE_DEF */ #define SIOCAIPXITFCRT (SIOCPROTOPRIVATE) #define SIOCAIPXPRISLT (SIOCPROTOPRIVATE + 1) diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index e4f048e..44b8a6b 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -139,6 +139,25 @@ #endif /* _NETINET_IN_H */ +/* Coordinate with glibc netipx/ipx.h header. */ +#if defined(__NETIPX_IPX_H) + +#define __UAPI_DEF_SOCKADDR_IPX 0 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 0 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 0 +#define __UAPI_DEF_IPX_CONFIG_DATA 0 +#define __UAPI_DEF_IPX_ROUTE_DEF 0 + +#else /* defined(__NETIPX_IPX_H) */ + +#define __UAPI_DEF_SOCKADDR_IPX 1 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#define __UAPI_DEF_IPX_CONFIG_DATA 1 +#define __UAPI_DEF_IPX_ROUTE_DEF 1 + +#endif /* defined(__NETIPX_IPX_H) */ + /* Definitions for xattr.h */ #if defined(_SYS_XATTR_H) #define __UAPI_DEF_XATTR 0 @@ -179,6 +198,13 @@ #define __UAPI_DEF_IN6_PKTINFO 1 #define __UAPI_DEF_IP6_MTUINFO 1 +/* Definitions for ipx.h */ +#define __UAPI_DEF_SOCKADDR_IPX 1 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#define __UAPI_DEF_IPX_CONFIG_DATA 1 +#define __UAPI_DEF_IPX_ROUTE_DEF 1 + /* Definitions for xattr.h */ #define __UAPI_DEF_XATTR 1 -- cgit v0.10.2 From 85b51b12115c79cce7ea1ced6c0bd0339a165d3f Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Thu, 18 Aug 2016 14:39:40 +0100 Subject: net: ipv6: Remove addresses for failures with strict DAD If DAD fails with accept_dad set to 2, global addresses and host routes are incorrectly left in place. Even though disable_ipv6 is set, contrary to documentation, the addresses are not dynamically deleted from the interface. It is only on a subsequent link down/up that these are removed. The fix is not only to set the disable_ipv6 flag, but also to call addrconf_ifdown(), which is the action to carry out when disabling IPv6. This results in the addresses and routes being deleted immediately. The DAD failure for the LL addr is determined as before via netlink, or by the absence of the LL addr (which also previously would have had to be checked for in case of an intervening link down and up). As the call to addrconf_ifdown() requires an rtnl lock, the logic to disable IPv6 when DAD fails is moved to addrconf_dad_work(). Previous behavior: root@vm1:/# sysctl net.ipv6.conf.eth3.accept_dad=2 net.ipv6.conf.eth3.accept_dad = 2 root@vm1:/# ip -6 addr add 2000::10/64 dev eth3 root@vm1:/# ip link set up eth3 root@vm1:/# ip -6 addr show dev eth3 5: eth3: mtu 1500 qlen 1000 inet6 2000::10/64 scope global valid_lft forever preferred_lft forever inet6 fe80::5054:ff:fe43:dd5a/64 scope link tentative dadfailed valid_lft forever preferred_lft forever root@vm1:/# ip -6 route show dev eth3 2000::/64 proto kernel metric 256 fe80::/64 proto kernel metric 256 root@vm1:/# ip link set down eth3 root@vm1:/# ip link set up eth3 root@vm1:/# ip -6 addr show dev eth3 root@vm1:/# ip -6 route show dev eth3 root@vm1:/# New behavior: root@vm1:/# sysctl net.ipv6.conf.eth3.accept_dad=2 net.ipv6.conf.eth3.accept_dad = 2 root@vm1:/# ip -6 addr add 2000::10/64 dev eth3 root@vm1:/# ip link set up eth3 root@vm1:/# ip -6 addr show dev eth3 root@vm1:/# ip -6 route show dev eth3 root@vm1:/# Signed-off-by: Mike Manning Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index df8425f..f418d2e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1872,7 +1872,6 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct inet6_ifaddr *ifp) { - struct in6_addr addr; struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(ifp->idev->dev); @@ -1934,18 +1933,6 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) in6_ifa_put(ifp2); lock_errdad: spin_lock_bh(&ifp->lock); - } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { - addr.s6_addr32[0] = htonl(0xfe800000); - addr.s6_addr32[1] = 0; - - if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && - ipv6_addr_equal(&ifp->addr, &addr)) { - /* DAD failed for link-local based on MAC address */ - idev->cnf.disable_ipv6 = 1; - - pr_info("%s: IPv6 being disabled!\n", - ifp->idev->dev->name); - } } errdad: @@ -3821,6 +3808,7 @@ static void addrconf_dad_work(struct work_struct *w) dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + bool disable_ipv6 = false; enum { DAD_PROCESS, @@ -3837,6 +3825,24 @@ static void addrconf_dad_work(struct work_struct *w) } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; + + if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 && + !(ifp->flags & IFA_F_STABLE_PRIVACY)) { + struct in6_addr addr; + + addr.s6_addr32[0] = htonl(0xfe800000); + addr.s6_addr32[1] = 0; + + if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && + ipv6_addr_equal(&ifp->addr, &addr)) { + /* DAD failed for link-local based on MAC */ + idev->cnf.disable_ipv6 = 1; + + pr_info("%s: IPv6 being disabled!\n", + ifp->idev->dev->name); + disable_ipv6 = true; + } + } } spin_unlock_bh(&ifp->lock); @@ -3845,6 +3851,8 @@ static void addrconf_dad_work(struct work_struct *w) goto out; } else if (action == DAD_ABORT) { addrconf_dad_stop(ifp, 1); + if (disable_ipv6) + addrconf_ifdown(idev->dev, 0); goto out; } -- cgit v0.10.2 From c0451fe1f27b815b3f400df2a63b9aecf589b7b0 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Sun, 21 Aug 2016 11:22:32 +0300 Subject: net: ip_finish_output_gso: Allow fragmenting segments of tunneled skbs if their DF is unset In b8247f095e, "net: ip_finish_output_gso: If skb_gso_network_seglen exceeds MTU, allow segmentation for local udp tunneled skbs" gso skbs arriving from an ingress interface that go through UDP tunneling, are allowed to be fragmented if the resulting encapulated segments exceed the dst mtu of the egress interface. This aligned the behavior of gso skbs to non-gso skbs going through udp encapsulation path. However the non-gso vs gso anomaly is present also in the following cases of a GRE tunnel: - ip_gre in collect_md mode, where TUNNEL_DONT_FRAGMENT is not set (e.g. OvS vport-gre with df_default=false) - ip_gre in nopmtudisc mode, where IFLA_GRE_IGNORE_DF is set In both of the above cases, the non-gso skbs get fragmented, whereas the gso skbs (having skb_gso_network_seglen that exceeds dst mtu) get dropped, as they don't go through the segment+fragment code path. Fix: Setting IPSKB_FRAG_SEGS if the tunnel specified IP_DF bit is NOT set. Tunnels that do set IP_DF, will not go to fragmentation of segments. This preserves behavior of ip_gre in (the default) pmtudisc mode. Fixes: b8247f095e ("net: ip_finish_output_gso: If skb_gso_network_seglen exceeds MTU, allow segmentation for local udp tunneled skbs") Reported-by: wenxu Cc: Hannes Frederic Sowa Signed-off-by: Shmulik Ladkani Tested-by: wenxu Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9d847c3..0f227db 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -73,9 +73,11 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && proto == IPPROTO_UDP) { - /* Arrived from an ingress interface and got udp encapuslated. - * The encapsulated network segment length may exceed dst mtu. + if (skb_iif && !(df & htons(IP_DF))) { + /* Arrived from an ingress interface, got encapsulated, with + * fragmentation of encapulating frames allowed. + * If skb is gso, the resulting encapsulated network segments + * may exceed dst mtu. * Allow IP Fragmentation of segments. */ IPCB(skb)->flags |= IPSKB_FRAG_SEGS; -- cgit v0.10.2 From 4870e704d901602e4ae5de462c4e65732cf2ed6c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 22 Aug 2016 12:03:29 +0300 Subject: qed: FLR of active VFs might lead to FW assert Driver never bothered marking the VF's vport with the VF's sw_fid. As a result, FLR flows are not going to clean those vports. If the vport was active when FLRed, re-activating it would lead to a FW assertion. Fixes: dacd88d6f6851 ("qed: IOV l2 functionality") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 35e5377..45ab746 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -561,9 +561,18 @@ struct qed_dev { static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev, u32 concrete_fid) { + u8 vfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_VFID); u8 pfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_PFID); + u8 vf_valid = GET_FIELD(concrete_fid, + PXP_CONCRETE_FID_VFVALID); + u8 sw_fid; - return pfid; + if (vf_valid) + sw_fid = vfid + MAX_NUM_PFS; + else + sw_fid = pfid; + + return sw_fid; } #define PURE_LB_TC 8 -- cgit v0.10.2 From 7329a655875a2f4bd6984fe8a7e00a6981e802f3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Aug 2016 12:15:22 -0700 Subject: usercopy: avoid potentially undefined behavior in pointer math check_bogus_address() checked for pointer overflow using this expression, where 'ptr' has type 'const void *': ptr + n < ptr Since pointer wraparound is undefined behavior, gcc at -O2 by default treats it like the following, which would not behave as intended: (long)n < 0 Fortunately, this doesn't currently happen for kernel code because kernel code is compiled with -fno-strict-overflow. But the expression should be fixed anyway to use well-defined integer arithmetic, since it could be treated differently by different compilers in the future or could be reported by tools checking for undefined behavior. Signed-off-by: Eric Biggers Signed-off-by: Kees Cook diff --git a/mm/usercopy.c b/mm/usercopy.c index 8ebae91..82f81df 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -124,7 +124,7 @@ static inline const char *check_kernel_text_object(const void *ptr, static inline const char *check_bogus_address(const void *ptr, unsigned long n) { /* Reject if object wraps past end of memory. */ - if (ptr + n < ptr) + if ((unsigned long)ptr + n < (unsigned long)ptr) return ""; /* Reject if NULL or ZERO-allocation. */ -- cgit v0.10.2 From 94cd97af690dd9537818dc9841d0ec68bb1dd877 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 22 Aug 2016 11:53:59 -0500 Subject: usercopy: fix overlap check for kernel text When running with a local patch which moves the '_stext' symbol to the very beginning of the kernel text area, I got the following panic with CONFIG_HARDENED_USERCOPY: usercopy: kernel memory exposure attempt detected from ffff88103dfff000 () (4096 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:79! invalid opcode: 0000 [#1] SMP ... CPU: 0 PID: 4800 Comm: cp Not tainted 4.8.0-rc3.after+ #1 Hardware name: Dell Inc. PowerEdge R720/0X3D66, BIOS 2.5.4 01/22/2016 task: ffff880817444140 task.stack: ffff880816274000 RIP: 0010:[] __check_object_size+0x76/0x413 RSP: 0018:ffff880816277c40 EFLAGS: 00010246 RAX: 000000000000006b RBX: ffff88103dfff000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff88081f80dfa8 RDI: ffff88081f80dfa8 RBP: ffff880816277c90 R08: 000000000000054c R09: 0000000000000000 R10: 0000000000000005 R11: 0000000000000006 R12: 0000000000001000 R13: ffff88103e000000 R14: ffff88103dffffff R15: 0000000000000001 FS: 00007fb9d1750800(0000) GS:ffff88081f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000021d2000 CR3: 000000081a08f000 CR4: 00000000001406f0 Stack: ffff880816277cc8 0000000000010000 000000043de07000 0000000000000000 0000000000001000 ffff880816277e60 0000000000001000 ffff880816277e28 000000000000c000 0000000000001000 ffff880816277ce8 ffffffff8136c3a6 Call Trace: [] copy_page_to_iter_iovec+0xa6/0x1c0 [] copy_page_to_iter+0x16/0x90 [] generic_file_read_iter+0x3e3/0x7c0 [] ? xfs_file_buffered_aio_write+0xad/0x260 [xfs] [] ? down_read+0x12/0x40 [] xfs_file_buffered_aio_read+0x51/0xc0 [xfs] [] xfs_file_read_iter+0x62/0xb0 [xfs] [] __vfs_read+0xdf/0x130 [] vfs_read+0x8e/0x140 [] SyS_read+0x55/0xc0 [] do_syscall_64+0x67/0x160 [] entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:[<00007fb9d0c33c00>] 0x7fb9d0c33c00 RSP: 002b:00007ffc9c262f28 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: fffffffffff8ffff RCX: 00007fb9d0c33c00 RDX: 0000000000010000 RSI: 00000000021c3000 RDI: 0000000000000004 RBP: 00000000021c3000 R08: 0000000000000000 R09: 00007ffc9c264d6c R10: 00007ffc9c262c50 R11: 0000000000000246 R12: 0000000000010000 R13: 00007ffc9c2630b0 R14: 0000000000000004 R15: 0000000000010000 Code: 81 48 0f 44 d0 48 c7 c6 90 4d a3 81 48 c7 c0 bb b3 a2 81 48 0f 44 f0 4d 89 e1 48 89 d9 48 c7 c7 68 16 a3 81 31 c0 e8 f4 57 f7 ff <0f> 0b 48 8d 90 00 40 00 00 48 39 d3 0f 83 22 01 00 00 48 39 c3 RIP [] __check_object_size+0x76/0x413 RSP The checked object's range [ffff88103dfff000, ffff88103e000000) is valid, so there shouldn't have been a BUG. The hardened usercopy code got confused because the range's ending address is the same as the kernel's text starting address at 0xffff88103e000000. The overlap check is slightly off. Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Josh Poimboeuf Signed-off-by: Kees Cook diff --git a/mm/usercopy.c b/mm/usercopy.c index 82f81df..a3cc305 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -83,7 +83,7 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low, unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ - if (check_low >= high || check_high < low) + if (check_low >= high || check_high <= low) return false; return true; -- cgit v0.10.2 From 28a10c426e81afc88514bca8e73affccf850fdf6 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 22 Aug 2016 07:10:20 -0400 Subject: net sched: fix encoding to use real length Encoding of the metadata was using the padded length as opposed to the real length of the data which is a bug per specification. This has not been an issue todate because all metadatum specified so far has been 32 bit where aligned and data length are the same width. This also includes a bug fix for validating the length of a u16 field. But since there is no metadata of size u16 yes we are fine to include it here. While at it get rid of magic numbers. Fixes: ef6980b6becb ("net sched: introduce IFE action") Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 141a06e..e87cd81 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) u32 *tlv = (u32 *)(skbdata); u16 totlen = nla_total_size(dlen); /*alignment + hdr */ char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | totlen; + u32 htlv = attrtype << 16 | dlen; *tlv = htonl(htlv); memset(dptr, 0, totlen - NLA_HDRLEN); @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(ife_release_meta_gen); int ife_validate_meta_u32(void *val, int len) { - if (len == 4) + if (len == sizeof(u32)) return 0; return -EINVAL; @@ -144,8 +144,8 @@ EXPORT_SYMBOL_GPL(ife_validate_meta_u32); int ife_validate_meta_u16(void *val, int len) { - /* length will include padding */ - if (len == NLA_ALIGN(2)) + /* length will not include padding */ + if (len == sizeof(u16)) return 0; return -EINVAL; @@ -652,12 +652,14 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlvdata = (u8 *)tlv; u16 mtype = tlv->type; u16 mlen = tlv->len; + u16 alen; mtype = ntohs(mtype); mlen = ntohs(mlen); + alen = NLA_ALIGN(mlen); - if (find_decode_metaid(skb, ife, mtype, (mlen - 4), - (void *)(tlvdata + 4))) { + if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN), + (void *)(tlvdata + NLA_HDRLEN))) { /* abuse overlimits to count when we receive metadata * but dont have an ops for it */ @@ -666,8 +668,8 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, ife->tcf_qstats.overlimits++; } - tlvdata += mlen; - ifehdrln -= mlen; + tlvdata += alen; + ifehdrln -= alen; tlv = (struct meta_tlvhdr *)tlvdata; } -- cgit v0.10.2 From 6c73358c83ce870c0cf32413e5cadb3b9a39c606 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 22 Aug 2016 16:58:53 -0400 Subject: USB: fix typo in wMaxPacketSize validation The maximum value allowed for wMaxPacketSize of a high-speed interrupt endpoint is 1024 bytes, not 1023. Signed-off-by: Alan Stern Fixes: aed9d65ac327 ("USB: validate wMaxPacketValue entries in endpoint descriptors") CC: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 0511631..15ce4ab 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -187,7 +187,7 @@ static const unsigned short high_speed_maxpacket_maxes[4] = { [USB_ENDPOINT_XFER_CONTROL] = 64, [USB_ENDPOINT_XFER_ISOC] = 1024, [USB_ENDPOINT_XFER_BULK] = 512, - [USB_ENDPOINT_XFER_INT] = 1023, + [USB_ENDPOINT_XFER_INT] = 1024, }; static const unsigned short super_speed_maxpacket_maxes[4] = { [USB_ENDPOINT_XFER_CONTROL] = 512, -- cgit v0.10.2 From a8719670687c46ed2e904c0d05fa4cd7e4950cd1 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 23 Aug 2016 10:27:19 +0300 Subject: ASoC: omap-mcpdm: Fix irq resource handling Fixes: ddd17531ad908 ("ASoC: omap-mcpdm: Clean up with devm_* function") Managed irq request will not doing any good in ASoC probe level as it is not going to free up the irq when the driver is unbound from the sound card. Signed-off-by: Peter Ujfalusi Reported-by: Russell King Signed-off-by: Mark Brown diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index 74d6e6f..64609c7 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -394,8 +394,8 @@ static int omap_mcpdm_probe(struct snd_soc_dai *dai) pm_runtime_get_sync(mcpdm->dev); omap_mcpdm_write(mcpdm, MCPDM_REG_CTRL, 0x00); - ret = devm_request_irq(mcpdm->dev, mcpdm->irq, omap_mcpdm_irq_handler, - 0, "McPDM", (void *)mcpdm); + ret = request_irq(mcpdm->irq, omap_mcpdm_irq_handler, 0, "McPDM", + (void *)mcpdm); pm_runtime_put_sync(mcpdm->dev); @@ -420,6 +420,7 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + free_irq(mcpdm->irq, (void *)mcpdm); pm_runtime_disable(mcpdm->dev); return 0; -- cgit v0.10.2 From a77ec83a57890240c546df00ca5df1cdeedb1cc3 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 6 Jun 2016 18:07:59 -0400 Subject: vhost/scsi: fix reuse of &vq->iov[out] in response The address of the iovec &vq->iov[out] is not guaranteed to contain the scsi command's response iovec throughout the lifetime of the command. Rather, it is more likely to contain an iovec from an immediately following command after looping back around to vhost_get_vq_desc(). Pass along the iovec entirely instead. Fixes: 79c14141a487 ("vhost/scsi: Convert completion path to use copy_to_iter") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Coddington Signed-off-by: Michael S. Tsirkin diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 9d6320e..6e29d05 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -88,7 +88,7 @@ struct vhost_scsi_cmd { struct scatterlist *tvc_prot_sgl; struct page **tvc_upages; /* Pointer to response header iovec */ - struct iovec *tvc_resp_iov; + struct iovec tvc_resp_iov; /* Pointer to vhost_scsi for our device */ struct vhost_scsi *tvc_vhost; /* Pointer to vhost_virtqueue for the cmd */ @@ -547,7 +547,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) memcpy(v_rsp.sense, cmd->tvc_sense_buf, se_cmd->scsi_sense_length); - iov_iter_init(&iov_iter, READ, cmd->tvc_resp_iov, + iov_iter_init(&iov_iter, READ, &cmd->tvc_resp_iov, cmd->tvc_in_iovs, sizeof(v_rsp)); ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter); if (likely(ret == sizeof(v_rsp))) { @@ -1044,7 +1044,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) } cmd->tvc_vhost = vs; cmd->tvc_vq = vq; - cmd->tvc_resp_iov = &vq->iov[out]; + cmd->tvc_resp_iov = vq->iov[out]; cmd->tvc_in_iovs = in; pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", -- cgit v0.10.2 From b88fa69eaa8649f11828158c7b65c4bcd886ebd5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Aug 2016 11:19:33 -0400 Subject: pNFS: The client must not do I/O to the DS if it's lease has expired Ensure that the client conforms to the normative behaviour described in RFC5661 Section 12.7.2: "If a client believes its lease has expired, it MUST NOT send I/O to the storage device until it has validated its lease." So ensure that we wait for the lease to be validated before using the layout. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.20+ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bf98f1b..6daf034 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1555,6 +1555,7 @@ pnfs_update_layout(struct inode *ino, } lookup_again: + nfs4_client_recover_expired_lease(clp); first = false; spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); -- cgit v0.10.2 From 41963c10c47a35185e68cb9049f7a3493c94d2d7 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 22 Aug 2016 14:11:16 -0400 Subject: pnfs/blocklayout: update last_write_offset atomically with extents Block/SCSI layout write completion may add committable extents to the extent tree before updating the layout's last-written byte under the inode lock. If a sync happens before this value is updated, then prepare_layoutcommit may find and encode these extents which would produce a LAYOUTCOMMIT request whose encoded extents are larger than the request's loca_length. Fix this by using a last-written byte value that is updated atomically with the extent tree so that commitable extents always match. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index f55a4e7..2178476 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work) PAGE_SIZE - 1) & (loff_t)PAGE_MASK; ext_tree_mark_written(bl, start >> SECTOR_SHIFT, - (end - start) >> SECTOR_SHIFT); + (end - start) >> SECTOR_SHIFT, end); } pnfs_ld_write_done(hdr); diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 18e6fd0..efc007f 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -141,6 +141,7 @@ struct pnfs_block_layout { struct rb_root bl_ext_ro; spinlock_t bl_ext_lock; /* Protects list manipulation */ bool bl_scsi_layout; + u64 bl_lwb; }; static inline struct pnfs_block_layout * @@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl, int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, sector_t end); int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, - sector_t len); + sector_t len, u64 lwb); bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, struct pnfs_block_extent *ret, bool rw); int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 992bcb1..c85fbfd 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be, int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, - sector_t len) + sector_t len, u64 lwb) { struct rb_root *root = &bl->bl_ext_rw; sector_t end = start + len; @@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, } } out: + if (bl->bl_lwb < lwb) + bl->bl_lwb = lwb; spin_unlock(&bl->bl_ext_lock); __ext_put_deviceids(&tmp); @@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p) } static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, - size_t buffer_size, size_t *count) + size_t buffer_size, size_t *count, __u64 *lastbyte) { struct pnfs_block_extent *be; int ret = 0; @@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, p = encode_block_extent(be, p); be->be_tag = EXTENT_COMMITTING; } + *lastbyte = bl->bl_lwb - 1; + bl->bl_lwb = 0; spin_unlock(&bl->bl_ext_lock); return ret; @@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) arg->layoutupdate_pages = &arg->layoutupdate_page; retry: - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); -- cgit v0.10.2 From 48ef5865d08fa0a36d786f2f8e12c6194d27538b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 19 Aug 2016 08:50:23 +0200 Subject: IB/qib: Use memdup_user() rather than duplicating its implementation Reuse existing functionality from memdup_user() instead of keeping duplicate source code. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fcdf3791..c3edc03 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf, pos = *ppos; - if (pos != 0) { - ret = -EINVAL; - goto bail; - } - - if (count != sizeof(struct qib_flash)) { - ret = -EINVAL; - goto bail; - } - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } + if (pos != 0 || count != sizeof(struct qib_flash)) + return -EINVAL; - if (copy_from_user(tmp, buf, count)) { - ret = -EFAULT; - goto bail_tmp; - } + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); dd = private2dd(file); if (qib_eeprom_write(dd, pos, tmp, count)) { @@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf, bail_tmp: kfree(tmp); - -bail: return ret; } -- cgit v0.10.2 From 92d27ae6b3bb3491c1685fb3ca7ae1b26d81bdf4 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 22 Aug 2016 18:23:24 +0200 Subject: IB/core: Use memdup_user() rather than duplicating its implementation * Reuse existing functionality from memdup_user() instead of keeping duplicate source code. This issue was detected by using the Coccinelle software. * The local variable "ret" will be set to an appropriate value a bit later. Thus omit the explicit initialisation at the beginning. Signed-off-by: Markus Elfring Signed-off-by: Doug Ledford diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e90dd2..e1f9673 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2115,22 +2115,17 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, size_t len) { const void __user *p = udata->inbuf + offset; - bool ret = false; + bool ret; u8 *buf; if (len > USHRT_MAX) return false; - buf = kmalloc(len, GFP_KERNEL); - if (!buf) + buf = memdup_user(p, len); + if (IS_ERR(buf)) return false; - if (copy_from_user(buf, p, len)) - goto free; - ret = !memchr_inv(buf, 0, len); - -free: kfree(buf); return ret; } -- cgit v0.10.2 From 6c7d46fdb8165ece4b0a17fb8f0b9320dbfeffc2 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 22 Aug 2016 18:09:14 -0500 Subject: i40iw: Change mem_resources pointer to a u8 iwdev->mem_resources is incorrectly defined as an unsigned long instead of u8. As a result, the offset into the dynamic allocated structures in i40iw_initialize_hw_resources() is incorrectly calculated and would lead to writing of memory regions outside of the allocated buffer. Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Reported-by: Stefan Assmann Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index b738acd..882f3ef 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -232,7 +232,7 @@ struct i40iw_device { struct i40e_client *client; struct i40iw_hw hw; struct i40iw_cm_core cm_core; - unsigned long *mem_resources; + u8 *mem_resources; unsigned long *allocated_qps; unsigned long *allocated_cqs; unsigned long *allocated_mrs; -- cgit v0.10.2 From 44856be3e95c87f03e850ef4fdf8c0503c2dde18 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 22 Aug 2016 18:15:58 -0500 Subject: i40iw: Protect req_resource_num update In i40iw_alloc_resource(), ensure that the update to req_resource_num is protected by the lock. Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Reported-by: Stefan Assmann Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 882f3ef..8ec09e4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev, *next = resource_num + 1; if (*next == max_resources) *next = 0; - spin_unlock_irqrestore(&iwdev->resource_lock, flags); *req_resource_num = resource_num; + spin_unlock_irqrestore(&iwdev->resource_lock, flags); return 0; } -- cgit v0.10.2 From faa739fb5df56aadab96bcd2f6eb3486cc3a3aec Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 22 Aug 2016 18:17:12 -0500 Subject: i40iw: Add missing check for interface already open In i40iw_open(), check if interface is already open and return success if it is. Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Reported-by: Stefan Assmann Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 6e90813..0cbbe40 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1558,6 +1558,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) enum i40iw_status_code status; struct i40iw_handler *hdl; + hdl = i40iw_find_netdev(ldev->netdev); + if (hdl) + return 0; + hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; -- cgit v0.10.2 From cff069b78c21559f427c3fefe9ef3294e3dec094 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Tue, 23 Aug 2016 20:27:33 +0530 Subject: iw_cxgb4: Fix cxgb4 arm CQ logic w/IB_CQ_REPORT_MISSED_EVENTS Current cxgb4 arm CQ logic ignores IB_CQ_REPORT_MISSED_EVENTS for request completion notification on a CQ. Due to this ib_poll_handler() assumes all events polled and avoids further iopoll scheduling. This patch adds logic to cxgb4 ib_req_notify_cq() handler to check if CQ is not empty and return accordingly. Based on the return value of ib_req_notify_cq() handler, ib_poll_handler() will schedule a run of iopoll handler. Signed-off-by: Potnuri Bharat Teja Reviewed-by: Steve Wise Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 812ab72..ac926c9 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1016,15 +1016,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; - int ret; + int ret = 0; unsigned long flag; chp = to_c4iw_cq(ibcq); spin_lock_irqsave(&chp->lock, flag); - ret = t4_arm_cq(&chp->cq, - (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); spin_unlock_irqrestore(&chp->lock, flag); - if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - ret = 0; return ret; } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 6126bbe..02173f4 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -634,6 +634,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) return (CQE_GENBIT(cqe) == cq->gen); } +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) { int ret; -- cgit v0.10.2 From 543852af8e5902aee8f7c72c89e1513663e0f696 Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 27 Jul 2016 22:24:04 +0800 Subject: iio: adc: rockchip_saradc: reset saradc controller before programming it SARADC controller needs to be reset before programming it, otherwise it will not function properly. Signed-off-by: Caesar Wang Cc: Jonathan Cameron Cc: Heiko Stuebner Cc: Rob Herring Cc: linux-iio@vger.kernel.org Cc: linux-rockchip@lists.infradead.org Tested-by: Guenter Roeck Cc: Signed-off-by: Jonathan Cameron diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt index bf99e2f..205593f 100644 --- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt +++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt @@ -16,6 +16,11 @@ Required properties: - vref-supply: The regulator supply ADC reference voltage. - #io-channel-cells: Should be 1, see ../iio-bindings.txt +Optional properties: +- resets: Must contain an entry for each entry in reset-names if need support + this option. See ../reset/reset.txt for details. +- reset-names: Must include the name "saradc-apb". + Example: saradc: saradc@2006c000 { compatible = "rockchip,saradc"; @@ -23,6 +28,8 @@ Example: interrupts = ; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; #io-channel-cells = <1>; vref-supply = <&vcc18>; }; diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 1de31bd..7675772 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -389,6 +389,7 @@ config QCOM_SPMI_VADC config ROCKCHIP_SARADC tristate "Rockchip SARADC driver" depends on ARCH_ROCKCHIP || (ARM && COMPILE_TEST) + depends on RESET_CONTROLLER help Say yes here to build support for the SARADC found in SoCs from Rockchip. diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index f9ad6c2..85d7012 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -53,6 +55,7 @@ struct rockchip_saradc { struct clk *clk; struct completion completion; struct regulator *vref; + struct reset_control *reset; const struct rockchip_saradc_data *data; u16 last_val; }; @@ -190,6 +193,16 @@ static const struct of_device_id rockchip_saradc_match[] = { }; MODULE_DEVICE_TABLE(of, rockchip_saradc_match); +/** + * Reset SARADC Controller. + */ +static void rockchip_saradc_reset_controller(struct reset_control *reset) +{ + reset_control_assert(reset); + usleep_range(10, 20); + reset_control_deassert(reset); +} + static int rockchip_saradc_probe(struct platform_device *pdev) { struct rockchip_saradc *info = NULL; @@ -218,6 +231,20 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (IS_ERR(info->regs)) return PTR_ERR(info->regs); + /* + * The reset should be an optional property, as it should work + * with old devicetrees as well + */ + info->reset = devm_reset_control_get(&pdev->dev, "saradc-apb"); + if (IS_ERR(info->reset)) { + ret = PTR_ERR(info->reset); + if (ret != -ENOENT) + return ret; + + dev_dbg(&pdev->dev, "no reset control found\n"); + info->reset = NULL; + } + init_completion(&info->completion); irq = platform_get_irq(pdev, 0); @@ -252,6 +279,9 @@ static int rockchip_saradc_probe(struct platform_device *pdev) return PTR_ERR(info->vref); } + if (info->reset) + rockchip_saradc_reset_controller(info->reset); + /* * Use a default value for the converter clock. * This may become user-configurable in the future. -- cgit v0.10.2 From 78ec79bfd59e126e1cb394302bfa531a420b3ecd Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 27 Jul 2016 22:24:06 +0800 Subject: arm64: dts: rockchip: add reset saradc node for rk3368 SoCs SARADC controller needs to be reset before programming it, otherwise it will not function properly. Signed-off-by: Caesar Wang Acked-by: Heiko Stuebner Cc: Signed-off-by: Jonathan Cameron diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index d02a9003..4f44d11 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -270,6 +270,8 @@ #io-channel-cells = <1>; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; -- cgit v0.10.2 From 3d4267a5a3a4b7619b80ad1839d8b3bedd8b7a8d Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 27 Jul 2016 22:24:07 +0800 Subject: arm: dts: rockchip: add reset node for the exist saradc SoCs SARADC controller needs to be reset before programming it, otherwise it will not function properly. Signed-off-by: Caesar Wang Acked-by: Heiko Stuebner Cc: Signed-off-by: Jonathan Cameron diff --git a/arch/arm/boot/dts/rk3066a.dtsi b/arch/arm/boot/dts/rk3066a.dtsi index c0ba86c..0d0dae3 100644 --- a/arch/arm/boot/dts/rk3066a.dtsi +++ b/arch/arm/boot/dts/rk3066a.dtsi @@ -197,6 +197,8 @@ clock-names = "saradc", "apb_pclk"; interrupts = ; #io-channel-cells = <1>; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index cd33f01..91c4b3c 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -279,6 +279,8 @@ #io-channel-cells = <1>; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index 99bbcc2..e2cd683 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -399,6 +399,8 @@ #io-channel-cells = <1>; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; -- cgit v0.10.2 From 53e5f36fbd2453ad69a3369a1db62dc06c30a4aa Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 23 Aug 2016 15:32:51 -0400 Subject: USB: avoid left shift by -1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UBSAN complains about a left shift by -1 in proc_do_submiturb(). This can occur when an URB is submitted for a bulk or control endpoint on a high-speed device, since the code doesn't bother to check the endpoint type; normally only interrupt or isochronous endpoints have a nonzero bInterval value. Aside from the fact that the operation is illegal, it shouldn't matter because the result isn't used. Still, in theory it could cause a hardware exception or other problem, so we should work around it. This patch avoids doing the left shift unless the shift amount is >= 0. The same piece of code has another problem. When checking the device speed (the exponential encoding for interrupt endpoints is used only by high-speed or faster devices), we need to look for speed >= USB_SPEED_SUPER as well as speed == USB_SPEED HIGH. The patch adds this check. Signed-off-by: Alan Stern Reported-by: Vittorio Zecca Tested-by: Vittorio Zecca Suggested-by: Bjørn Mork CC: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e6a6d67..09c8d9c 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1709,11 +1709,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb as->urb->start_frame = uurb->start_frame; as->urb->number_of_packets = number_of_packets; as->urb->stream_id = stream_id; - if (uurb->type == USBDEVFS_URB_TYPE_ISO || - ps->dev->speed == USB_SPEED_HIGH) - as->urb->interval = 1 << min(15, ep->desc.bInterval - 1); - else - as->urb->interval = ep->desc.bInterval; + + if (ep->desc.bInterval) { + if (uurb->type == USBDEVFS_URB_TYPE_ISO || + ps->dev->speed == USB_SPEED_HIGH || + ps->dev->speed >= USB_SPEED_SUPER) + as->urb->interval = 1 << + min(15, ep->desc.bInterval - 1); + else + as->urb->interval = ep->desc.bInterval; + } + as->urb->context = as; as->urb->complete = async_completed; for (totlen = u = 0; u < number_of_packets; u++) { -- cgit v0.10.2 From c0082e985fdf77b02fc9e0dac3b58504dcf11b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Fri, 12 Aug 2016 15:26:30 +0200 Subject: ubifs: Fix assertion in layout_in_gaps() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An assertion in layout_in_gaps() verifies that the gap_lebs pointer is below the maximum bound. When computing this maximum bound the idx_lebs count is multiplied by sizeof(int), while C pointers arithmetic does take into account the size of the pointed elements implicitly already. Remove the multiplication to fix the assertion. Fixes: 1e51764a3c2ac05a ("UBIFS: add new flash file system") Cc: Signed-off-by: Vincent Stehlé Cc: Artem Bityutskiy Signed-off-by: Artem Bityutskiy Signed-off-by: Richard Weinberger diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index b45345d..51157da 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) p = c->gap_lebs; do { - ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); + ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs); written = layout_leb_in_gaps(c, p); if (written < 0) { err = written; -- cgit v0.10.2 From 17ce1eb0b64eb27d4f9180daae7495fa022c7b0d Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 31 Jul 2016 21:42:23 +0200 Subject: ubifs: Fix xattr generic handler usage UBIFS uses full names to work with xattrs, therefore we have to use xattr_full_name() to obtain the xattr prefix as string. Cc: Cc: Andreas Gruenbacher Fixes: 2b88fc21ca ("ubifs: Switch to generic xattr handlers") Signed-off-by: Richard Weinberger Reviewed-by: Andreas Gruenbacher Tested-by: Dongsheng Yang diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index e237811..11a0041 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler, dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, inode->i_ino, dentry, size); - return __ubifs_getxattr(inode, name, buffer, size); + name = xattr_full_name(handler, name); + return __ubifs_getxattr(inode, name, buffer, size); } static int ubifs_xattr_set(const struct xattr_handler *handler, @@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler, dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, inode->i_ino, dentry, size); + name = xattr_full_name(handler, name); + if (value) return __ubifs_setxattr(inode, name, value, size, flags); else -- cgit v0.10.2 From dad2232844073295c64e9cc2d734a0ade043e0f6 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 17 Aug 2016 18:10:11 +0300 Subject: um: Don't discard .text.exit section Commit e41f501d3912 ("vmlinux.lds: account for destructor sections") added '.text.exit' to EXIT_TEXT which is discarded at link time by default. This breaks compilation of UML: `.text.exit' referenced in section `.fini_array' of /usr/lib/gcc/x86_64-linux-gnu/6/../../../x86_64-linux-gnu/libc.a(sdlerror.o): defined in discarded section `.text.exit' of /usr/lib/gcc/x86_64-linux-gnu/6/../../../x86_64-linux-gnu/libc.a(sdlerror.o) Apparently UML doesn't want to discard exit text, so let's place all EXIT_TEXT sections in .exit.text. Fixes: e41f501d3912 ("vmlinux.lds: account for destructor sections") Reported-by: Stefan Traby Signed-off-by: Andrey Ryabinin Cc: Acked-by: Dmitry Vyukov Signed-off-by: Richard Weinberger diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 1dd5bd8..1330553 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -81,7 +81,7 @@ .altinstr_replacement : { *(.altinstr_replacement) } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ - .exit.text : { *(.exit.text) } + .exit.text : { EXIT_TEXT } .exit.data : { *(.exit.data) } .preinit_array : { -- cgit v0.10.2 From 21c80c9fefc3db10b530a96eb0478c29eb28bf77 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 23 Aug 2016 16:36:42 -0500 Subject: x86/PCI: VMD: Fix infinite loop executing irq's We can't initialize the list head on deletion as this causes the node to point to itself, which causes an infinite loop if vmd_irq() happens to be servicing that node. The list initialization was trying to fix a bug from multiple calls to disable the same IRQ. Fix this instead by having the VMD driver track if the interrupt is enabled. [bhelgaas: changelog, add "Fixes"] Fixes: 97e923063575 ("x86/PCI: VMD: Initialize list item in IRQ disable") Reported-by: Grzegorz Koczot Tested-by: Miroslaw Drost Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by Jon Derrick: diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index b814ca6..7948be3 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -41,6 +41,7 @@ static DEFINE_RAW_SPINLOCK(list_lock); * @node: list item for parent traversal. * @rcu: RCU callback item for freeing. * @irq: back pointer to parent. + * @enabled: true if driver enabled IRQ * @virq: the virtual IRQ value provided to the requesting driver. * * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to @@ -50,6 +51,7 @@ struct vmd_irq { struct list_head node; struct rcu_head rcu; struct vmd_irq_list *irq; + bool enabled; unsigned int virq; }; @@ -122,7 +124,9 @@ static void vmd_irq_enable(struct irq_data *data) unsigned long flags; raw_spin_lock_irqsave(&list_lock, flags); + WARN_ON(vmdirq->enabled); list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); + vmdirq->enabled = true; raw_spin_unlock_irqrestore(&list_lock, flags); data->chip->irq_unmask(data); @@ -136,8 +140,10 @@ static void vmd_irq_disable(struct irq_data *data) data->chip->irq_mask(data); raw_spin_lock_irqsave(&list_lock, flags); - list_del_rcu(&vmdirq->node); - INIT_LIST_HEAD_RCU(&vmdirq->node); + if (vmdirq->enabled) { + list_del_rcu(&vmdirq->node); + vmdirq->enabled = false; + } raw_spin_unlock_irqrestore(&list_lock, flags); } -- cgit v0.10.2 From e83c6744e81abc93a20d0eb3b7f504a176a6126a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 13:59:33 -0700 Subject: udp: fix poll() issue with zero sized packets Laura tracked poll() [and friends] regression caused by commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") udp_poll() needs to know if there is a valid packet in receive queue, even if its payload length is 0. Change first_packet_length() to return an signed int, and use -1 as the indication of an empty queue. Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Reported-by: Laura Abbott Signed-off-by: Eric Dumazet Tested-by: Laura Abbott Signed-off-by: David S. Miller diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e61f7cd..00d18c5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1182,13 +1182,13 @@ out: * @sk: socket * * Drops all bad checksum frames, until a valid one is found. - * Returns the length of found skb, or 0 if none is found. + * Returns the length of found skb, or -1 if none is found. */ -static unsigned int first_packet_length(struct sock *sk) +static int first_packet_length(struct sock *sk) { struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; - unsigned int res; + int res; __skb_queue_head_init(&list_kill); @@ -1203,7 +1203,7 @@ static unsigned int first_packet_length(struct sock *sk) __skb_unlink(skb, rcvq); __skb_queue_tail(&list_kill, skb); } - res = skb ? skb->len : 0; + res = skb ? skb->len : -1; spin_unlock_bh(&rcvq->lock); if (!skb_queue_empty(&list_kill)) { @@ -1232,7 +1232,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) case SIOCINQ: { - unsigned int amount = first_packet_length(sk); + int amount = max_t(int, 0, first_packet_length(sk)); return put_user(amount, (int __user *)arg); } @@ -2184,7 +2184,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && - !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) + !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(POLLIN | POLLRDNORM); return mask; -- cgit v0.10.2 From b323431bc017e9862870cbbac004774c769ee112 Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 22 Aug 2016 15:56:38 +0200 Subject: gianfar: prevent fragmentation in DSA environments The eTSEC register MRBLR defines the maximum space in the RX buffers and is set to 1536 by gianfar. This reasonably covers the common use case where the MTU is kept at default 1500. In that case, the largest Ethernet frame size of 1518 plus an optional GMAC_FCB_LEN of 8, and an additional padding of 8 to handle FSL_GIANFAR_DEV_HAS_TIMER totals to 1534 and nicely fit within the chosen MRBLR. Alas, if the eTSEC is attached to a DSA enabled switch, the (E)DSA header extension (4 or 8 bytes) causes every maximum sized frame to be fragmented by the hardware. This patch increases the maximum RX buffer size by 8 and rounds up to the next multiple of 64, which the hardware's defines as RX buffer granularity. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 373fd09..6e8a9c8 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -100,7 +100,8 @@ extern const char gfar_driver_version[]; #define DEFAULT_RX_LFC_THR 16 #define DEFAULT_LFC_PTVVAL 4 -#define GFAR_RXB_SIZE 1536 +/* prevent fragmenation by HW in DSA environments */ +#define GFAR_RXB_SIZE roundup(1536 + 8, 64) #define GFAR_SKBFRAG_SIZE (RXBUF_ALIGNMENT + GFAR_RXB_SIZE \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define GFAR_RXB_TRUESIZE 2048 -- cgit v0.10.2 From 6c389fc931bcda88940c809f752ada6d7799482c Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 22 Aug 2016 15:58:12 +0200 Subject: gianfar: fix size of scatter-gathered frames The current scatter-gather logic in gianfar is flawed, since it does not consider the eTSEC's RxBD 'Data Length' field is context depening: for the last fragment it contains the full frame size, while fragments contain the fragment size, which equals the value written to register MRBLR. This causes data corruption as soon as the hardware starts to fragment receiving frames. As a result, the size of fragmented frames is increased by (nr_frags - 1) * MRBLR We first noticed this issue working with DSA, where an ICMP request sized 1472 bytes causes the scatter-gather logic to kick in. The full Ethernet frame (1518) gets increased by DSA (4), GMAC_FCB_LEN (8), and FSL_GIANFAR_DEV_HAS_TIMER (priv->padding=8) to a total of 1538 octets, which is fragmented by the hardware and reconstructed by the driver to a 3074 octet frame. This patch fixes the problem by adjusting the size of the last fragment. It was tested by setting MRBLR to different multiples of 64, proving correct scatter-gather operation on frames with up to 9000 octets in size. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d20935d..4b4f5bc 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2922,17 +2922,25 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, { unsigned int size = lstatus & BD_LENGTH_MASK; struct page *page = rxb->page; + bool last = !!(lstatus & BD_LFLAG(RXBD_LAST)); /* Remove the FCS from the packet length */ - if (likely(lstatus & BD_LFLAG(RXBD_LAST))) + if (last) size -= ETH_FCS_LEN; - if (likely(first)) + if (likely(first)) { skb_put(skb, size); - else - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rxb->page_offset + RXBUF_ALIGNMENT, - size, GFAR_RXB_TRUESIZE); + } else { + /* the last fragments' length contains the full frame length */ + if (last) + size -= skb->len; + + /* in case the last fragment consisted only of the FCS */ + if (size > 0) + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rxb->page_offset + RXBUF_ALIGNMENT, + size, GFAR_RXB_TRUESIZE); + } /* try reuse page */ if (unlikely(page_count(page) != 1)) -- cgit v0.10.2 From 20a2b49fc538540819a0c552877086548cff8d8d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2016 11:31:10 -0700 Subject: tcp: properly scale window in tcp_v[46]_reqsk_send_ack() When sending an ack in SYN_RECV state, we must scale the offered window if wscale option was negotiated and accepted. Tested: Following packetdrill test demonstrates the issue : 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 // Establish a connection. +0 < S 0:0(0) win 20000 +0 > S. 0:0(0) ack 1 win 28960 +0 < . 1:11(10) ack 1 win 156 // check that window is properly scaled ! +0 > . 1:1(0) ack 1 win 226 Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 32b048e..7158d4f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -814,8 +814,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt; + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ tcp_v4_send_ack(sock_net(sk), skb, seq, - tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, 0, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33df8b8..94f4f89 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -944,9 +944,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); -- cgit v0.10.2 From f64f14820e2deb5db056a05d7672ee2b1c6290e5 Mon Sep 17 00:00:00 2001 From: Xander Huff Date: Mon, 22 Aug 2016 15:57:16 -0500 Subject: phy: micrel: Reenable interrupts during resume for ksz9031 Like the ksz8081, the ksz9031 has the behavior where it will clear the interrupt enable bits when leaving power down. This takes advantage of the solution provided by f5aba91. Signed-off-by: Xander Huff Signed-off-by: Nathan Sullivan Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 053e879..885ac9c 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -964,7 +964,7 @@ static struct phy_driver ksphy_driver[] = { .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, .suspend = genphy_suspend, - .resume = genphy_resume, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = MICREL_PHY_ID_MASK, -- cgit v0.10.2 From 1bc261fabe866c4cdc97f52319eaa0c7ee31026e Mon Sep 17 00:00:00 2001 From: Jamie Lentin Date: Mon, 22 Aug 2016 22:47:08 +0100 Subject: net: mv88e6xxx: Fix ingress rate removal for mv6131 chips The PORT_RATE_CONTROL register works differently on 88e6095/6095f/6131 in comparison to 6123/61/65, and 0x0 disables. The distinction was lost Linux 4.1 --> 4.2 Signed-off-by: Jamie Lentin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d1d9d3c..7106790 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2656,15 +2656,19 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return ret; } + /* Rate Control: disable ingress rate limiting. */ if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || - mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) || mv88e6xxx_6320_family(chip)) { - /* Rate Control: disable ingress rate limiting. */ ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL, 0x0001); if (ret) return ret; + } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) { + ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), + PORT_RATE_CONTROL, 0x0000); + if (ret) + return ret; } /* Port Control 1: disable trunking, disable sending -- cgit v0.10.2 From 53080fe9c451e7625e71b91c384e7bef1be72b00 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 23 Aug 2016 09:48:20 -0300 Subject: net: lpc_eth: Check clk_prepare_enable() error clk_prepare_enable() may fail, so we should better check its return value and propagate it in the case of failure While at it, replace __lpc_eth_clock_enable() with a plain clk_prepare_enable/clk_disable_unprepare() call in order to simplify the code. Signed-off-by: Fabio Estevam Acked-by: Vladimir Zapolskiy Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 4d4ecba..8e13ec8 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -475,14 +475,6 @@ static void __lpc_get_mac(struct netdata_local *pldat, u8 *mac) mac[5] = tmp >> 8; } -static void __lpc_eth_clock_enable(struct netdata_local *pldat, bool enable) -{ - if (enable) - clk_prepare_enable(pldat->clk); - else - clk_disable_unprepare(pldat->clk); -} - static void __lpc_params_setup(struct netdata_local *pldat) { u32 tmp; @@ -1056,7 +1048,7 @@ static int lpc_eth_close(struct net_device *ndev) writel(0, LPC_ENET_MAC2(pldat->net_base)); spin_unlock_irqrestore(&pldat->lock, flags); - __lpc_eth_clock_enable(pldat, false); + clk_disable_unprepare(pldat->clk); return 0; } @@ -1197,11 +1189,14 @@ static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) static int lpc_eth_open(struct net_device *ndev) { struct netdata_local *pldat = netdev_priv(ndev); + int ret; if (netif_msg_ifup(pldat)) dev_dbg(&pldat->pdev->dev, "enabling %s\n", ndev->name); - __lpc_eth_clock_enable(pldat, true); + ret = clk_prepare_enable(pldat->clk); + if (ret) + return ret; /* Suspended PHY makes LPC ethernet core block, so resume now */ phy_resume(ndev->phydev); @@ -1320,7 +1315,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) } /* Enable network clock */ - __lpc_eth_clock_enable(pldat, true); + ret = clk_prepare_enable(pldat->clk); + if (ret) + goto err_out_clk_put; /* Map IO space */ pldat->net_base = ioremap(res->start, resource_size(res)); @@ -1454,6 +1451,7 @@ err_out_iounmap: iounmap(pldat->net_base); err_out_disable_clocks: clk_disable_unprepare(pldat->clk); +err_out_clk_put: clk_put(pldat->clk); err_out_free_dev: free_netdev(ndev); -- cgit v0.10.2 From a8184003c0bb1d6362c2af76c560b3caae6832cb Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 23 Aug 2016 16:31:28 +0200 Subject: dwc_eth_qos: fix interrupt enable race We currently enable interrupts before we enable NAPI. If an RX interrupt hits before we enabled NAPI then the NAPI callback is never called and we leave the hardware with RX interrupts disabled, which of course leads us to never handling received packets. Fix this by moving the interrupt enable to after we've enable NAPI and the reclaim tasklet. Fixes: cd5e41234729 ("dwc_eth_qos: do phy_start before resetting hardware") Signed-off-by: Rabin Vincent Signed-off-by: Lars Persson Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 9f159a7..5a3941b 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -1622,13 +1622,7 @@ static void dwceqos_init_hw(struct net_local *lp) DWCEQOS_MMC_CTRL_RSTONRD); dwceqos_enable_mmc_interrupt(lp); - /* Enable Interrupts */ - dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, - DWCEQOS_DMA_CH0_IE_NIE | - DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE | - DWCEQOS_DMA_CH0_IE_AIE | - DWCEQOS_DMA_CH0_IE_FBEE); - + dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, 0); dwceqos_write(lp, REG_DWCEQOS_MAC_IE, 0); dwceqos_write(lp, REG_DWCEQOS_MAC_CFG, DWCEQOS_MAC_CFG_IPC | @@ -1905,6 +1899,15 @@ static int dwceqos_open(struct net_device *ndev) netif_start_queue(ndev); tasklet_enable(&lp->tx_bdreclaim_tasklet); + /* Enable Interrupts -- do this only after we enable NAPI and the + * tasklet. + */ + dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, + DWCEQOS_DMA_CH0_IE_NIE | + DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE | + DWCEQOS_DMA_CH0_IE_AIE | + DWCEQOS_DMA_CH0_IE_FBEE); + return 0; } -- cgit v0.10.2 From 232cb53a45965f8789fbf0a9a1962f8c67ab1a3c Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Tue, 23 Aug 2016 11:40:52 -0400 Subject: sctp: fix overrun in sctp_diag_dump_one() The function sctp_diag_dump_one() currently performs a memcpy() of 64 bytes from a 16 byte field into another 16 byte field. Fix by using correct size, use sizeof to obtain correct size instead of using a hard-coded constant. Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Lance Richardson Reviewed-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index bb69153..f3508aa 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -424,11 +424,13 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb, paddr.v4.sin_family = AF_INET; } else { laddr.v6.sin6_port = req->id.idiag_sport; - memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64); + memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, + sizeof(laddr.v6.sin6_addr)); laddr.v6.sin6_family = AF_INET6; paddr.v6.sin6_port = req->id.idiag_dport; - memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64); + memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, + sizeof(paddr.v6.sin6_addr)); paddr.v6.sin6_family = AF_INET6; } -- cgit v0.10.2 From 75d855a5e93e6f3d9b37a8719d69a5318f051453 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 09:57:51 -0700 Subject: udp: get rid of SLAB_DESTROY_BY_RCU allocations After commit ca065d0cf80f ("udp: no longer use SLAB_DESTROY_BY_RCU") we do not need this special allocation mode anymore, even if it is harmless. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 00d18c5..5fdcb8d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2216,7 +2216,6 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3b3efbd..2eea073 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -55,7 +55,6 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81e2f98..19ac3a1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1460,7 +1460,6 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 9cf097e..fd6ef41 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -50,7 +50,6 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- cgit v0.10.2 From 7b996243fab46092fb3a29c773c54be8152366e4 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Tue, 23 Aug 2016 18:22:33 -0400 Subject: tun: fix transmit timestamp support Instead of using sock_tx_timestamp, use skb_tx_timestamp to record software transmit timestamp of a packet. sock_tx_timestamp resets and overrides the tx_flags of the skb. The function is intended to be called from within the protocol layer when creating the skb, not from a device driver. This is inconsistent with other drivers and will cause issues for TCP. In TCP, we intend to sample the timestamps for the last byte for each sendmsg/sendpage. For that reason, tcp_sendmsg calls tcp_tx_timestamp only with the last skb that it generates. For example, if a 128KB message is split into two 64KB packets we want to sample the SND timestamp of the last packet. The current code in the tun driver, however, will result in sampling the SND timestamp for both packets. Also, when the last packet is split into smaller packets for retranmission (see tcp_fragment), the tun driver will record timestamps for all of the retransmitted packets and not only the last packet. Fixes: eda297729171 (tun: Support software transmit time stamping.) Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Francis Yan Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9c8b5bc..6f9df37 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -894,11 +894,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) goto drop; - if (skb->sk && sk_fullsock(skb->sk)) { - sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags, - &skb_shinfo(skb)->tx_flags); - sw_tx_timestamp(skb); - } + skb_tx_timestamp(skb); /* Orphan the skb - required as we might hang on to it * for indefinite time. -- cgit v0.10.2 From d7226c7a4dd19929d6df4ae04698da2fcf6f875a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 23 Aug 2016 21:05:27 -0700 Subject: net: diag: Fix refcnt leak in error path destroying socket inet_diag_find_one_icsk takes a reference to a socket that is not released if sock_diag_destroy returns an error. Fix by changing tcp_diag_destroy to manage the refcnt for all cases and remove the sock_put calls from tcp_abort. Fixes: c1e64e298b8ca ("net: diag: Support destroying TCP sockets") Reported-by: Lorenzo Colitti Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 032a96d..ffbb218 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3193,7 +3193,6 @@ int tcp_abort(struct sock *sk, int err) local_bh_enable(); return 0; } - sock_gen_put(sk); return -EOPNOTSUPP; } @@ -3222,7 +3221,6 @@ int tcp_abort(struct sock *sk, int err) bh_unlock_sock(sk); local_bh_enable(); release_sock(sk); - sock_put(sk); return 0; } EXPORT_SYMBOL_GPL(tcp_abort); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 4d61093..a748c74 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -54,11 +54,16 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, { struct net *net = sock_net(in_skb->sk); struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + int err; if (IS_ERR(sk)) return PTR_ERR(sk); - return sock_diag_destroy(sk, ECONNABORTED); + err = sock_diag_destroy(sk, ECONNABORTED); + + sock_gen_put(sk); + + return err; } #endif -- cgit v0.10.2 From 27727df240c7cc84f2ba6047c6f18d5addfd25ef Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 23 Aug 2016 16:08:21 -0700 Subject: timekeeping: Avoid taking lock in NMI path with CONFIG_DEBUG_TIMEKEEPING When I added some extra sanity checking in timekeeping_get_ns() under CONFIG_DEBUG_TIMEKEEPING, I missed that the NMI safe __ktime_get_fast_ns() method was using timekeeping_get_ns(). Thus the locking added to the debug checks broke the NMI-safety of __ktime_get_fast_ns(). This patch open-codes the timekeeping_get_ns() logic for __ktime_get_fast_ns(), so can avoid any deadlocks in NMI. Fixes: 4ca22c2648f9 "timekeeping: Add warnings when overflows or underflows are observed" Reported-by: Steven Rostedt Reported-by: Peter Zijlstra Signed-off-by: John Stultz Cc: stable Link: http://lkml.kernel.org/r/1471993702-29148-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3b65746..e07fb09 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) do { seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); - now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); + now = ktime_to_ns(tkr->base); + + now += clocksource_delta(tkr->read(tkr->clock), + tkr->cycle_last, tkr->mask); } while (read_seqcount_retry(&tkf->seq, seq)); return now; -- cgit v0.10.2 From a4f8f6667f099036c88f231dcad4cf233652c824 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 23 Aug 2016 16:08:22 -0700 Subject: timekeeping: Cap array access in timekeeping_debug It was reported that hibernation could fail on the 2nd attempt, where the system hangs at hibernate() -> syscore_resume() -> i8237A_resume() -> claim_dma_lock(), because the lock has already been taken. However there is actually no other process would like to grab this lock on that problematic platform. Further investigation showed that the problem is triggered by setting /sys/power/pm_trace to 1 before the 1st hibernation. Since once pm_trace is enabled, the rtc becomes unmeaningful after suspend, and meanwhile some BIOSes would like to adjust the 'invalid' RTC (e.g, smaller than 1970) to the release date of that motherboard during POST stage, thus after resumed, it may seem that the system had a significant long sleep time which is a completely meaningless value. Then in timekeeping_resume -> tk_debug_account_sleep_time, if the bit31 of the sleep time happened to be set to 1, fls() returns 32 and we add 1 to sleep_time_bin[32], which causes an out of bounds array access and therefor memory being overwritten. As depicted by System.map: 0xffffffff81c9d080 b sleep_time_bin 0xffffffff81c9d100 B dma_spin_lock the dma_spin_lock.val is set to 1, which caused this problem. This patch adds a sanity check in tk_debug_account_sleep_time() to ensure we don't index past the sleep_time_bin array. [jstultz: Problem diagnosed and original patch by Chen Yu, I've solved the issue slightly differently, but borrowed his excelent explanation of the issue here.] Fixes: 5c83545f24ab "power: Add option to log time spent in suspend" Reported-by: Janek Kozicki Reported-by: Chen Yu Signed-off-by: John Stultz Cc: linux-pm@vger.kernel.org Cc: Peter Zijlstra Cc: Xunlei Pang Cc: "Rafael J. Wysocki" Cc: stable Cc: Zhang Rui Link: http://lkml.kernel.org/r/1471993702-29148-3-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index f6bd652..107310a 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -23,7 +23,9 @@ #include "timekeeping_internal.h" -static unsigned int sleep_time_bin[32] = {0}; +#define NUM_BINS 32 + +static unsigned int sleep_time_bin[NUM_BINS] = {0}; static int tk_debug_show_sleep_time(struct seq_file *s, void *data) { @@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init); void tk_debug_account_sleep_time(struct timespec64 *t) { - sleep_time_bin[fls(t->tv_sec)]++; + /* Cap bin index so we don't overflow the array */ + int bin = min(fls(t->tv_sec), NUM_BINS-1); + + sleep_time_bin[bin]++; } -- cgit v0.10.2 From 2e63ad4bd5dd583871e6602f9d398b9322d358d9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 23 Aug 2016 20:07:19 +0800 Subject: x86/apic: Do not init irq remapping if ioapic is disabled native_smp_prepare_cpus -> default_setup_apic_routing -> enable_IR_x2apic -> irq_remapping_prepare -> intel_prepare_irq_remapping -> intel_setup_irq_remapping So IR table is setup even if "noapic" boot parameter is added. As a result we crash later when the interrupt affinity is set due to a half initialized remapping infrastructure. Prevent remap initialization when IOAPIC is disabled. Signed-off-by: Wanpeng Li Cc: Peter Zijlstra Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1471954039-3942-1-git-send-email-wanpeng.li@hotmail.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index cea4fc1..50c95af 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1623,6 +1623,9 @@ void __init enable_IR_x2apic(void) unsigned long flags; int ret, ir_stat; + if (skip_ioapic_setup) + return; + ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) return; -- cgit v0.10.2 From abaa2274811d607679e8687b4118c4922a3517ac Mon Sep 17 00:00:00 2001 From: Anisse Astier Date: Wed, 24 Aug 2016 09:14:13 +0200 Subject: ALSA: hda/realtek - fix headset mic detection for MSI MS-B120 MSI Cubi MS-B120 needs the same fixup as the Gigabyte BXBT-2807 for its mic to work. They both use a single 3-way jack for both mic and headset with an ALC283 codec, with the same pins used. Cc: Daniel Drake Signed-off-by: Anisse Astier Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 574b1b4..7100f05 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4828,7 +4828,7 @@ enum { ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, ALC292_FIXUP_TPT440, - ALC283_FIXUP_BXBT2807_MIC, + ALC283_FIXUP_HEADSET_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, ALC280_FIXUP_HP_GPIO4, @@ -5321,7 +5321,7 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC292_FIXUP_TPT440_DOCK, }, - [ALC283_FIXUP_BXBT2807_MIC] = { + [ALC283_FIXUP_HEADSET_MIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { { 0x19, 0x04a110f0 }, @@ -5651,7 +5651,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), - SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC), + SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), -- cgit v0.10.2 From 588deb614a0d3caa596dd8eba8c4d31eaaeb89b9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 9 Aug 2016 11:03:56 +0100 Subject: MAINTAINERS: Add ARM ARCHITECTED TIMER entry The ARM architected timer driver falls under the drivers/clocksource/ catch-all in MAINTAINERS, and get_maintainers.pl doesn't suggest a number of people who should be Cc'd. The ARM architected timer is a core component of ARMv7+VE and ARMv8, and is critical to the correct operation of both architecture ports (and their respective KVM code), and patches to it should have review by knowledgeable interested parties. This patch adds a MAINTAINERS entry for the driver and its low-level arch components, such that get_maintainer.pl will always include relevant interested parties for modifications to the driver. For the timebeing, this means myself and Marc Zyngier. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Cc: Catalin Marinas Cc: Daniel Lezcano Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1470737036-2082-1-git-send-email-mark.rutland@arm.com Signed-off-by: Thomas Gleixner diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b1..4705c94 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -881,6 +881,15 @@ S: Supported F: drivers/gpu/drm/arc/ F: Documentation/devicetree/bindings/display/snps,arcpgu.txt +ARM ARCHITECTED TIMER DRIVER +M: Mark Rutland +M: Marc Zyngier +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/include/asm/arch_timer.h +F: arch/arm64/include/asm/arch_timer.h +F: drivers/clocksource/arm_arch_timer.c + ARM HDLCD DRM DRIVER M: Liviu Dudau S: Supported -- cgit v0.10.2 From aa8c0f1ad7e862147f4efb32bbb71ff66eb38caa Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Tue, 23 Aug 2016 23:19:29 +0800 Subject: clocksource/drivers/pxa: Fix include files for compilation We get 1 warning about global functions without a declaration in the clocksource/drivers/pxa driver when building with W=1: drivers/clocksource/pxa_timer.c:221:13: warning: no previous prototype for 'pxa_timer_nodt_init' [-Wmissing-prototypes] void __init pxa_timer_nodt_init(int irq, void __iomem *base, In fact, this function is declared in pxa.h, so this patch add missing header dependencies. Signed-off-by: Baoyou Xie Reviewed-by: Arnd Bergmann Cc: daniel.lezcano@linaro.org Cc: xie.baoyou@zte.com.cn Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1471965569-4104-1-git-send-email-baoyou.xie@linaro.org Signed-off-by: Thomas Gleixner diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c index 937e10b..3e1cb51 100644 --- a/drivers/clocksource/pxa_timer.c +++ b/drivers/clocksource/pxa_timer.c @@ -21,6 +21,8 @@ #include #include +#include + #include #define OSMR0 0x00 /* OS Timer 0 Match Register */ -- cgit v0.10.2 From 40d9c32525cba79130612650b1abc47c0c0f19a8 Mon Sep 17 00:00:00 2001 From: Aleksandr Makarov Date: Wed, 24 Aug 2016 13:06:22 +0300 Subject: USB: serial: option: add WeTelecom 0x6802 and 0x6803 products These product IDs are listed in Windows driver. 0x6803 corresponds to WeTelecom WM-D300. 0x6802 name is unknown. Signed-off-by: Aleksandr Makarov Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bb6a711..9894e34 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -528,6 +528,8 @@ static void option_instat_callback(struct urb *urb); /* WeTelecom products */ #define WETELECOM_VENDOR_ID 0x22de #define WETELECOM_PRODUCT_WMD200 0x6801 +#define WETELECOM_PRODUCT_6802 0x6802 +#define WETELECOM_PRODUCT_WMD300 0x6803 struct option_blacklist_info { /* bitmask of interface numbers blacklisted for send_setup */ @@ -1996,6 +1998,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v0.10.2 From f74bdd4cb5d0d4c3e89919e850e0bbb8789f32f9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 16 Aug 2016 21:49:45 +0200 Subject: hwrng: mxc-rnga - Fix Kconfig dependency We can directly depend on SOC_IMX31 since commit c9ee94965dce ("ARM: imx: deconstruct mxc_rnga initialization") Since that commit, CONFIG_HW_RANDOM_MXC_RNGA could not be switched on with unknown symbol ARCH_HAS_RNGA and mxc-rnga.o can't be generated with ARCH=arm make M=drivers/char/hw_random Previously, HW_RANDOM_MXC_RNGA required ARCH_HAS_RNGA which was based on IMX_HAVE_PLATFORM_MXC_RNGA && ARCH_MXC. IMX_HAVE_PLATFORM_MXC_RNGA was based on SOC_IMX31. Fixes: c9ee94965dce ("ARM: imx: deconstruct mxc_rnga initialization") Signed-off-by: Fabian Frederick Acked-by: Arnd Bergmann Signed-off-by: Herbert Xu diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 56ad5a5..8c0770b 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -244,7 +244,7 @@ config HW_RANDOM_TX4939 config HW_RANDOM_MXC_RNGA tristate "Freescale i.MX RNGA Random Number Generator" - depends on ARCH_HAS_RNGA + depends on SOC_IMX31 default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number -- cgit v0.10.2 From 10bb087ce381c812cd81a65ffd5e6f83e6399291 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 18 Aug 2016 19:53:36 +0100 Subject: crypto: qat - fix aes-xts key sizes Increase value of supported key sizes for qat_aes_xts. aes-xts keys consists of keys of equal size concatenated. Fixes: def14bfaf30d ("crypto: qat - add support for ctr(aes) and xts(aes)") Cc: stable@vger.kernel.org Reported-by: Wenqian Yu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 769148d..20f35df 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -1260,8 +1260,8 @@ static struct crypto_alg qat_algs[] = { { .setkey = qat_alg_ablkcipher_xts_setkey, .decrypt = qat_alg_ablkcipher_decrypt, .encrypt = qat_alg_ablkcipher_encrypt, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, }, }, -- cgit v0.10.2 From 901d3d4fee83e9407d91e7178048e2fed6c91f6b Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Wed, 24 Aug 2016 15:34:40 +0800 Subject: crypto: vmx - fix null dereference in p8_aes_xts_crypt walk.iv is not assigned a value in blkcipher_walk_init. It makes iv uninitialized. It is possibly a null value(as shown below), which is then used by aes_p8_encrypt. This patch moves iv = walk.iv after blkcipher_walk_virt, in which walk.iv is set. [17856.268050] Unable to handle kernel paging request for data at address 0x00000000 [17856.268212] Faulting instruction address: 0xd000000002ff04bc 7:mon> t [link register ] d000000002ff47b8 p8_aes_xts_crypt+0x168/0x2a0 [vmx_crypto] (938) [c000000013b77960] d000000002ff4794 p8_aes_xts_crypt+0x144/0x2a0 [vmx_crypto] (unreliable) [c000000013b77a70] c000000000544d64 skcipher_decrypt_blkcipher+0x64/0x80 [c000000013b77ac0] d000000003c0175c crypt_convert+0x53c/0x620 [dm_crypt] [c000000013b77ba0] d000000003c043fc kcryptd_crypt+0x3cc/0x440 [dm_crypt] [c000000013b77c50] c0000000000f3070 process_one_work+0x1e0/0x590 [c000000013b77ce0] c0000000000f34c8 worker_thread+0xa8/0x660 [c000000013b77d80] c0000000000fc0b0 kthread+0x110/0x130 [c000000013b77e30] c0000000000098f0 ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Li Zhong Signed-off-by: Herbert Xu diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c index cfb2541..24353ec3 100644 --- a/drivers/crypto/vmx/aes_xts.c +++ b/drivers/crypto/vmx/aes_xts.c @@ -129,8 +129,8 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); - iv = (u8 *)walk.iv; ret = blkcipher_walk_virt(desc, &walk); + iv = walk.iv; memset(tweak, 0, AES_BLOCK_SIZE); aes_p8_encrypt(iv, tweak, &ctx->tweak_key); -- cgit v0.10.2 From 8b6a3fe8fab97716990a3abde1a01fb5a34552a3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 24 Aug 2016 10:07:14 +0100 Subject: perf/core: Use this_cpu_ptr() when stopping AUX events When tearing down an AUX buf for an event via perf_mmap_close(), __perf_event_output_stop() is called on the event's CPU to ensure that trace generation is halted before the process of unmapping and freeing the buffer pages begins. The callback is performed via cpu_function_call(), which ensures that it runs with interrupts disabled and is therefore not preemptible. Unfortunately, the current code grabs the per-cpu context pointer using get_cpu_ptr(), which unnecessarily disables preemption and doesn't pair the call with put_cpu_ptr(), leading to a preempt_count() imbalance and a BUG when freeing the AUX buffer later on: WARNING: CPU: 1 PID: 2249 at kernel/events/ring_buffer.c:539 __rb_free_aux+0x10c/0x120 Modules linked in: [...] Call Trace: [] dump_stack+0x4f/0x72 [] __warn+0xc6/0xe0 [] warn_slowpath_null+0x18/0x20 [] __rb_free_aux+0x10c/0x120 [] rb_free_aux+0x13/0x20 [] perf_mmap_close+0x29e/0x2f0 [] ? perf_iterate_ctx+0xe0/0xe0 [] remove_vma+0x25/0x60 [] exit_mmap+0x106/0x140 [] mmput+0x1c/0xd0 [] do_exit+0x253/0xbf0 [] do_group_exit+0x3e/0xb0 [] get_signal+0x249/0x640 [] do_signal+0x23/0x640 [] ? _raw_write_unlock_irq+0x12/0x30 [] ? _raw_spin_unlock_irq+0x9/0x10 [] ? __schedule+0x2c6/0x710 [] exit_to_usermode_loop+0x74/0x90 [] prepare_exit_to_usermode+0x26/0x30 [] retint_user+0x8/0x10 This patch uses this_cpu_ptr() instead of get_cpu_ptr(), since preemption is already disabled by the caller. Signed-off-by: Will Deacon Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 95ff4ca26c49 ("perf/core: Free AUX pages in unmap path") Link: http://lkml.kernel.org/r/20160824091905.GA16944@arm.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 5650f53..3cfabdf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6166,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info) { struct perf_event *event = info; struct pmu *pmu = event->pmu; - struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct remote_output ro = { .rb = event->rb, }; -- cgit v0.10.2 From 87904c3e82319cf2bad8d656d79c5030dab9490e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 12 Aug 2016 16:00:53 +0200 Subject: drm/tegra: dsi: Enhance runtime power management The MIPI DSI output on Tegra SoCs requires some external logic to calibrate the MIPI pads before a video signal can be transmitted. This MIPI calibration logic requires to be powered on while the MIPI pads are being used, which is currently done as part of the DSI driver's probe implementation. This is suboptimal because it will leave the MIPI calibration logic powered up even if the DSI output is never used. On Tegra114 and earlier this behaviour also causes the driver to hang while trying to power up the MIPI calibration logic because the power partition that contains the MIPI calibration logic will be powered on by the display controller at output pipeline configuration time. Thus the power up sequence for the MIPI calibration logic happens before it's power partition is guaranteed to be enabled. Fix this by splitting up the API into a request/free pair of functions that manage the runtime dependency between the DSI and the calibration modules (no registers are accessed) and a set of enable, calibrate and disable functions that program the MIPI calibration logic at points in time where the power partition is really enabled. While at it, make sure that the runtime power management also works in ganged mode, which is currently also broken. Reported-by: Jonathan Hunter Tested-by: Jonathan Hunter Signed-off-by: Thierry Reding diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 3d228ad..3dea121 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -840,6 +840,21 @@ static const struct drm_encoder_funcs tegra_dsi_encoder_funcs = { .destroy = tegra_output_encoder_destroy, }; +static void tegra_dsi_unprepare(struct tegra_dsi *dsi) +{ + int err; + + if (dsi->slave) + tegra_dsi_unprepare(dsi->slave); + + err = tegra_mipi_disable(dsi->mipi); + if (err < 0) + dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n", + err); + + pm_runtime_put(dsi->dev); +} + static void tegra_dsi_encoder_disable(struct drm_encoder *encoder) { struct tegra_output *output = encoder_to_output(encoder); @@ -876,7 +891,26 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder) tegra_dsi_disable(dsi); - pm_runtime_put(dsi->dev); + tegra_dsi_unprepare(dsi); +} + +static void tegra_dsi_prepare(struct tegra_dsi *dsi) +{ + int err; + + pm_runtime_get_sync(dsi->dev); + + err = tegra_mipi_enable(dsi->mipi); + if (err < 0) + dev_err(dsi->dev, "failed to enable MIPI calibration: %d\n", + err); + + err = tegra_dsi_pad_calibrate(dsi); + if (err < 0) + dev_err(dsi->dev, "MIPI calibration failed: %d\n", err); + + if (dsi->slave) + tegra_dsi_prepare(dsi->slave); } static void tegra_dsi_encoder_enable(struct drm_encoder *encoder) @@ -887,13 +921,8 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder) struct tegra_dsi *dsi = to_dsi(output); struct tegra_dsi_state *state; u32 value; - int err; - - pm_runtime_get_sync(dsi->dev); - err = tegra_dsi_pad_calibrate(dsi); - if (err < 0) - dev_err(dsi->dev, "MIPI calibration failed: %d\n", err); + tegra_dsi_prepare(dsi); state = tegra_dsi_get_state(dsi); diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c index 52a6fd2..e00809d 100644 --- a/drivers/gpu/host1x/mipi.c +++ b/drivers/gpu/host1x/mipi.c @@ -242,20 +242,6 @@ struct tegra_mipi_device *tegra_mipi_request(struct device *device) dev->pads = args.args[0]; dev->device = device; - mutex_lock(&dev->mipi->lock); - - if (dev->mipi->usage_count++ == 0) { - err = tegra_mipi_power_up(dev->mipi); - if (err < 0) { - dev_err(dev->mipi->dev, - "failed to power up MIPI bricks: %d\n", - err); - return ERR_PTR(err); - } - } - - mutex_unlock(&dev->mipi->lock); - return dev; put: @@ -270,29 +256,42 @@ EXPORT_SYMBOL(tegra_mipi_request); void tegra_mipi_free(struct tegra_mipi_device *device) { - int err; + platform_device_put(device->pdev); + kfree(device); +} +EXPORT_SYMBOL(tegra_mipi_free); - mutex_lock(&device->mipi->lock); +int tegra_mipi_enable(struct tegra_mipi_device *dev) +{ + int err = 0; - if (--device->mipi->usage_count == 0) { - err = tegra_mipi_power_down(device->mipi); - if (err < 0) { - /* - * Not much that can be done here, so an error message - * will have to do. - */ - dev_err(device->mipi->dev, - "failed to power down MIPI bricks: %d\n", - err); - } - } + mutex_lock(&dev->mipi->lock); - mutex_unlock(&device->mipi->lock); + if (dev->mipi->usage_count++ == 0) + err = tegra_mipi_power_up(dev->mipi); + + mutex_unlock(&dev->mipi->lock); + + return err; - platform_device_put(device->pdev); - kfree(device); } -EXPORT_SYMBOL(tegra_mipi_free); +EXPORT_SYMBOL(tegra_mipi_enable); + +int tegra_mipi_disable(struct tegra_mipi_device *dev) +{ + int err = 0; + + mutex_lock(&dev->mipi->lock); + + if (--dev->mipi->usage_count == 0) + err = tegra_mipi_power_down(dev->mipi); + + mutex_unlock(&dev->mipi->lock); + + return err; + +} +EXPORT_SYMBOL(tegra_mipi_disable); static int tegra_mipi_wait(struct tegra_mipi *mipi) { diff --git a/include/linux/host1x.h b/include/linux/host1x.h index d2ba7d3..1ffbf2a 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -304,6 +304,8 @@ struct tegra_mipi_device; struct tegra_mipi_device *tegra_mipi_request(struct device *device); void tegra_mipi_free(struct tegra_mipi_device *device); +int tegra_mipi_enable(struct tegra_mipi_device *device); +int tegra_mipi_disable(struct tegra_mipi_device *device); int tegra_mipi_calibrate(struct tegra_mipi_device *device); #endif -- cgit v0.10.2 From 9b47f77a680447e0132b2cf7fb82374e014bec1c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 24 Aug 2016 03:52:12 -0700 Subject: nvme: Fix nvme_get/set_features() with a NULL result pointer nvme_set_features() callers seem to expect that passing NULL as the result pointer is acceptable. Teach nvme_set_features() not to try to write to the NULL address. For symmetry, make the same change to nvme_get_features(), despite the fact that all current callers pass a valid result pointer. I assume that this bug hasn't been reported in practice because the callers that pass NULL are all in the SCSI translation layer and no one uses the relevant operations. Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7f75d66..2feacc7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -611,7 +611,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, NVME_QID_ANY, 0, 0); - if (ret >= 0) + if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); return ret; } @@ -631,7 +631,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, NVME_QID_ANY, 0, 0); - if (ret >= 0) + if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); return ret; } -- cgit v0.10.2 From 4d70dca4eadf2f95abe389116ac02b8439c2d16c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 23 Aug 2016 21:49:45 +0800 Subject: block: make sure a big bio is split into at most 256 bvecs After arbitrary bio size was introduced, the incoming bio may be very big. We have to split the bio into small bios so that each holds at most BIO_MAX_PAGES bvecs for safety reason, such as bio_clone(). This patch fixes the following kernel crash: > [ 172.660142] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 > [ 172.660229] IP: [] bio_trim+0xf/0x2a > [ 172.660289] PGD 7faf3e067 PUD 7f9279067 PMD 0 > [ 172.660399] Oops: 0000 [#1] SMP > [...] > [ 172.664780] Call Trace: > [ 172.664813] [] ? raid1_make_request+0x2e8/0xad7 [raid1] > [ 172.664846] [] ? blk_queue_split+0x377/0x3d4 > [ 172.664880] [] ? md_make_request+0xf6/0x1e9 [md_mod] > [ 172.664912] [] ? generic_make_request+0xb5/0x155 > [ 172.664947] [] ? prio_io+0x85/0x95 [bcache] > [ 172.664981] [] ? register_cache_set+0x355/0x8d0 [bcache] > [ 172.665016] [] ? register_bcache+0x1006/0x1174 [bcache] The issue can be reproduced by the following steps: - create one raid1 over two virtio-blk - build bcache device over the above raid1 and another cache device and bucket size is set as 2Mbytes - set cache mode as writeback - run random write over ext4 on the bcache device Fixes: 54efd50(block: make generic_make_request handle arbitrarily sized bios) Reported-by: Sebastian Roesner Reported-by: Eric Wheeler Cc: stable@vger.kernel.org (4.3+) Cc: Shaohua Li Acked-by: Kent Overstreet Signed-off-by: Ming Lei Signed-off-by: Jens Axboe diff --git a/block/blk-merge.c b/block/blk-merge.c index 72627e3..2642e5f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -94,9 +94,31 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, bool do_split = true; struct bio *new = NULL; const unsigned max_sectors = get_max_io_size(q, bio); + unsigned bvecs = 0; bio_for_each_segment(bv, bio, iter) { /* + * With arbitrary bio size, the incoming bio may be very + * big. We have to split the bio into small bios so that + * each holds at most BIO_MAX_PAGES bvecs because + * bio_clone() can fail to allocate big bvecs. + * + * It should have been better to apply the limit per + * request queue in which bio_clone() is involved, + * instead of globally. The biggest blocker is the + * bio_clone() in bio bounce. + * + * If bio is splitted by this reason, we should have + * allowed to continue bios merging, but don't do + * that now for making the change simple. + * + * TODO: deal with bio bounce's bio_clone() gracefully + * and convert the global limit into per-queue limit. + */ + if (bvecs++ >= BIO_MAX_PAGES) + goto split; + + /* * If the queue doesn't support SG gaps and adding this * offset would create a gap, disallow it. */ -- cgit v0.10.2 From 5661538749511d4c2f7d33e1e179f10c545b24d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 17 Aug 2016 13:44:20 +0200 Subject: drm/amdgpu: fix lru size grouping v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding a BO can make it the insertion point for larger sizes as well. v2: add a comment about the guard structure. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8c704c8..700c56b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -426,6 +426,8 @@ struct amdgpu_mman { /* custom LRU management */ struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE]; + /* guard for log2_size array, don't add anything in between */ + struct amdgpu_mman_lru guard; }; int amdgpu_copy_buffer(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 141bfca..716f2af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -950,6 +950,8 @@ static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo) struct list_head *res = lru->lru[tbo->mem.mem_type]; lru->lru[tbo->mem.mem_type] = &tbo->lru; + while ((++lru)->lru[tbo->mem.mem_type] == res) + lru->lru[tbo->mem.mem_type] = &tbo->lru; return res; } @@ -960,6 +962,8 @@ static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo) struct list_head *res = lru->swap_lru; lru->swap_lru = &tbo->swap; + while ((++lru)->swap_lru == res) + lru->swap_lru = &tbo->swap; return res; } @@ -1011,6 +1015,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) lru->swap_lru = &adev->mman.bdev.glob->swap_lru; } + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) + adev->mman.guard.lru[j] = NULL; + adev->mman.guard.swap_lru = NULL; + adev->mman.initialized = true; r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, adev->mc.real_vram_size >> PAGE_SHIFT); -- cgit v0.10.2 From 9afee94939e3eda4c8bf239f7727cb56e158c976 Mon Sep 17 00:00:00 2001 From: Frederic Dalleau Date: Tue, 23 Aug 2016 07:59:19 +0200 Subject: Bluetooth: Fix memory leak at end of hci requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In hci_req_sync_complete the event skb is referenced in hdev->req_skb. It is used (via hci_req_run_skb) from either __hci_cmd_sync_ev which will pass the skb to the caller, or __hci_req_sync which leaks. unreferenced object 0xffff880005339a00 (size 256): comm "kworker/u3:1", pid 1011, jiffies 4294671976 (age 107.389s) backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0x128/0x180 [] skb_clone+0x4f/0xa0 [] hci_event_packet+0xc1/0x3290 [] hci_rx_work+0x18b/0x360 [] process_one_work+0x14a/0x440 [] worker_thread+0x43/0x4d0 [] kthread+0xc4/0xe0 [] ret_from_fork+0x1f/0x40 [] 0xffffffffffffffff Signed-off-by: Frédéric Dalleau Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c045b3c..b0e23df 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -262,6 +262,8 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, break; } + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; hdev->req_status = hdev->req_result = 0; BT_DBG("%s end: err %d", hdev->name, err); -- cgit v0.10.2 From dbb50887c8f619fc5c3489783ebc3122bc134a31 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 27 Jul 2016 11:40:14 -0700 Subject: Bluetooth: split sk_filter in l2cap_sock_recv_cb During an audit for sk_filter(), we found that rx_busy_skb handling in l2cap_sock_recv_cb() and l2cap_sock_recvmsg() looks not quite as intended. The assumption from commit e328140fdacb ("Bluetooth: Use event-driven approach for handling ERTM receive buffer") is that errors returned from sock_queue_rcv_skb() are due to receive buffer shortage. However, nothing should prevent doing a setsockopt() with SO_ATTACH_FILTER on the socket, that could drop some of the incoming skbs when handled in sock_queue_rcv_skb(). In that case sock_queue_rcv_skb() will return with -EPERM, propagated from sk_filter() and if in L2CAP_MODE_ERTM mode, wrong assumption was that we failed due to receive buffer being full. From that point onwards, due to the to-be-dropped skb being held in rx_busy_skb, we cannot make any forward progress as rx_busy_skb is never cleared from l2cap_sock_recvmsg(), due to the filter drop verdict over and over coming from sk_filter(). Meanwhile, in l2cap_sock_recv_cb() all new incoming skbs are being dropped due to rx_busy_skb being occupied. Instead, just use __sock_queue_rcv_skb() where an error really tells that there's a receive buffer issue. Split the sk_filter() and enable it for non-segmented modes at queuing time since at this point in time the skb has already been through the ERTM state machine and it has been acked, so dropping is not allowed. Instead, for ERTM and streaming mode, call sk_filter() in l2cap_data_rcv() so the packet can be dropped before the state machine sees it. Fixes: e328140fdacb ("Bluetooth: Use event-driven approach for handling ERTM receive buffer") Signed-off-by: Daniel Borkmann Signed-off-by: Mat Martineau Acked-by: Willem de Bruijn Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 54ceb1f..d4cad29b0 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -5835,6 +5836,9 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, if (chan->sdu) break; + if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) + break; + chan->sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); @@ -6610,6 +6614,10 @@ static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) goto drop; } + if ((chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb)) + goto drop; + if (!control->sframe) { int err; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 1842141..a8ba752 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1019,7 +1019,7 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, goto done; if (pi->rx_busy_skb) { - if (!sock_queue_rcv_skb(sk, pi->rx_busy_skb)) + if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb)) pi->rx_busy_skb = NULL; else goto done; @@ -1270,7 +1270,17 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) goto done; } - err = sock_queue_rcv_skb(sk, skb); + if (chan->mode != L2CAP_MODE_ERTM && + chan->mode != L2CAP_MODE_STREAMING) { + /* Even if no filter is attached, we could potentially + * get errors from security modules, etc. + */ + err = sk_filter(sk, skb); + if (err) + goto done; + } + + err = __sock_queue_rcv_skb(sk, skb); /* For ERTM, handle one skb that doesn't fit into the recv * buffer. This is important to do because the data frames -- cgit v0.10.2 From 5dfd5e5e3bc68ab3912acc712c8180942094fc69 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 22 Aug 2016 18:16:37 -0500 Subject: i40iw: Add missing NULL check for MPA private data Add NULL check for pdata and pdata->addr before the memcpy in i40iw_form_cm_frame(). This fixes a NULL pointer de-reference which occurs when the MPA private data pointer is NULL. Also only copy pdata->size bytes in the memcpy to prevent reading past the length of the private data buffer provided by upper layer. Fixes: f27b4746f378 ("i40iw: add connection management code") Reported-by: Stefan Assmann Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5026dc7..6434398 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, buf += hdr_len; } - if (pd_len) - memcpy(buf, pdata->addr, pd_len); + if (pdata && pdata->addr) + memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); -- cgit v0.10.2 From 7eaf8313b1cfe93417a22bdc3f7380cac2a3dc6d Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 22 Aug 2016 19:01:47 -0500 Subject: i40iw: Do not set self-referencing pointer to NULL after kfree In i40iw_free_virt_mem(), do not set mem->va to NULL after freeing it as mem->va is a self-referencing pointer to mem. Fixes: 4e9042e647ff ("i40iw: add hw and utils files") Reported-by: Stefan Assmann Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 0e8db0a..6fd043b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, { if (!mem) return I40IW_ERR_PARAM; + /* + * mem->va points to the parent of mem, so both mem and mem->va + * can not be touched once mem->va is freed + */ kfree(mem->va); - mem->va = NULL; return 0; } -- cgit v0.10.2 From 82d200cc6fc817dbf049b67e5e3215eab427e846 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 Aug 2016 21:16:26 +0100 Subject: IB/mlx5: Remove superfluous include of io-mapping.h This file does not use any structs or functions defined by io-mapping.h (nor does it directly use iomap, ioremap, iounamp or friends). Remove it to simplify verification of changes to io-mapping.h The include existed since its inception in commit e126ba97dba9edeb6fafa3665b5f8497fc9cdf8c Author: Eli Cohen Date: Sun Jul 7 17:25:49 2013 +0300 mlx5: Add driver for Mellanox Connect-IB adapters which looks like a copy across from the Mellanox ethernet driver. Signed-off-by: Chris Wilson Cc: Eli Cohen Cc: Jack Morgenstein Cc: Or Gerlitz Cc: Matan Barak Cc: Leon Romanovsky Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Reviewed-by: Leon Romanovsky Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a84bb76..1b4094b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -37,7 +37,6 @@ #include #include #include -#include #if defined(CONFIG_X86) #include #endif -- cgit v0.10.2 From d41d0910d97f05be987d2d60de7e8685c108963b Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 23 Aug 2016 16:50:13 -0500 Subject: i40iw: Fix double free of allocated_buffer Memory allocated for iwqp; iwqp->allocated_buffer is freed twice in the create_qp error path. Correct this by having it freed only once in i40iw_free_qp_resources(). Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2360338..722e5af 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return &iwqp->ibqp; error: i40iw_free_qp_resources(iwdev, iwqp, qp_num); - kfree(mem); return ERR_PTR(err_code); } -- cgit v0.10.2 From 433c58139f6a7d59824aadd23d6c9cac1d4e6100 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 23 Aug 2016 17:24:56 -0500 Subject: i40iw: Avoid writing to freed memory iwpbl->iwmr points to the structure that contains iwpbl, which is iwmr. Setting this to NULL would result in writing to freed memory. So just free iwmr, and return. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Reported-by: Stefan Assmann Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 722e5af..6329c97 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1925,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) } if (iwpbl->pbl_allocated) i40iw_free_pble(iwdev->pble_rsrc, palloc); - kfree(iwpbl->iwmr); - iwpbl->iwmr = NULL; + kfree(iwmr); return 0; } -- cgit v0.10.2 From 3c199b4523c92dc5df027eeeffa657e2ccf453ab Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 24 Aug 2016 01:17:41 -0400 Subject: RDMA/ocrdma: Fix the max_sge reported from FW Current driver is reporting wrong values for max_sge and max_sge_rd in query_device. This breaks the nfs rdma and iser in some device profiles. Fixing the driver to report correct values from FW. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 16740dc..67fc0b6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_srq = (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; - attr->max_send_sge = ((rsp->max_write_send_sge & + attr->max_send_sge = ((rsp->max_recv_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); - attr->max_recv_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT; + attr->max_recv_sge = (rsp->max_recv_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT; attr->max_srq_sge = (rsp->max_srq_rqe_sge & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET; - attr->max_rdma_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT; + attr->max_rdma_sge = (rsp->max_wr_rd_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT; attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 0efc966..37df448 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -554,9 +554,9 @@ enum { OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF << - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF, @@ -612,6 +612,8 @@ enum { OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0, OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF << OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT = 0, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK = 0xFFFF, }; struct ocrdma_mbx_query_config { @@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config { struct ocrdma_mbx_rsp rsp; u32 qp_srq_cq_ird_ord; u32 max_pd_ca_ack_delay; - u32 max_write_send_sge; + u32 max_recv_send_sge; u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; @@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config { u32 max_wqes_rqes_per_q; u32 max_cq_cqes_per_cq; u32 max_srq_rqe_sge; + u32 max_wr_rd_sge; + u32 ird_pgsz_num_pages; }; struct ocrdma_fw_ver_rsp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b1a3d91..0aa8547 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = dev->attr.max_send_sge; - attr->max_sge_rd = attr->max_sge; + attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; -- cgit v0.10.2 From f888f58795b640442165e60a6fa93e8e623d01a5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 11:18:51 +0200 Subject: mlxsw: spectrum: Add missing flood to router port In case we have a layer 3 interface on top of a bridge (VLAN / FID RIF), then we should flood the following packet types to the router: * Broadcast: If DIP is the broadcast address of the interface, then we need to be able to get it to CPU by trapping it following route lookup. * Reserved IP multicast (224.0.0.X): Some control packets (e.g. OSPF) use this range and are trapped in the router block. Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index f33b997..af371a8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -56,6 +56,7 @@ #define MLXSW_PORT_PHY_BITS_MASK (MLXSW_PORT_MAX_PHY_PORTS - 1) #define MLXSW_PORT_CPU_PORT 0x0 +#define MLXSW_PORT_ROUTER_PORT (MLXSW_PORT_MAX_PHY_PORTS + 2) #define MLXSW_PORT_DONT_CARE (MLXSW_PORT_MAX_PORTS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1f81689..7291f2c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3324,6 +3324,39 @@ static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_find(mlxsw_sp, fid); } +static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; +} + +static u16 mlxsw_sp_flood_table_index_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; +} + +static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, + bool set) +{ + enum mlxsw_flood_table_type table_type; + char *sftr_pl; + u16 index; + int err; + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + + table_type = mlxsw_sp_flood_table_type_get(fid); + index = mlxsw_sp_flood_table_index_get(fid); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, index, table_type, + 1, MLXSW_PORT_ROUTER_PORT, set); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + + kfree(sftr_pl); + return err; +} + static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) { if (mlxsw_sp_fid_is_vfid(fid)) @@ -3360,10 +3393,14 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, if (rif == MLXSW_SP_RIF_MAX) return -ERANGE; - err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); + err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); if (err) return err; + err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); + if (err) + goto err_rif_bridge_op; + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); if (err) goto err_rif_fdb_op; @@ -3385,6 +3422,8 @@ err_rif_alloc: mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); err_rif_fdb_op: mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); +err_rif_bridge_op: + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); return err; } @@ -3404,6 +3443,8 @@ void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); + netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif); } -- cgit v0.10.2 From 51af96b53469f3b8cfcfe0504d0ff87239175b78 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 24 Aug 2016 11:18:52 +0200 Subject: mlxsw: router: Enable neighbors to be created on stacked devices Make the function mlxsw_router_neigh_construct search the rif according to the neighbour dev other than the dev that was passed to the ndo, thus allowing creating neigbhours upon stacked devices. Fixes: 6cf3c971dc84 ("mlxsw: spectrum_router: Add private neigh table") Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 90bb93b..917ddd1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -657,7 +657,7 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev, return 0; } - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); if (WARN_ON(!r)) return -EINVAL; -- cgit v0.10.2 From 9a035a40f7f3f6708b79224b86c5777a3334f7ea Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 15 Aug 2016 09:02:38 -0600 Subject: xenbus: don't look up transaction IDs for ordinary writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should really only be done for XS_TRANSACTION_END messages, or else at least some of the xenstore-* tools don't work anymore. Fixes: 0beef634b8 ("xenbus: don't BUG() on user mode induced condition") Reported-by: Richard Schütz Cc: Signed-off-by: Jan Beulich Tested-by: Richard Schütz Signed-off-by: David Vrabel diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 7487971..c1010f01 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type, rc = -ENOMEM; goto out; } - } else { + } else if (msg_type == XS_TRANSACTION_END) { list_for_each_entry(trans, &u->transactions, list) if (trans->handle.id == u->u.msg.tx_id) break; -- cgit v0.10.2 From 55467dea2967259f21f4f854fc99d39cc5fea60e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 29 Jul 2016 11:06:48 +0200 Subject: xen: change the type of xen_vcpu_id to uint32_t We pass xen_vcpu_id mapping information to hypercalls which require uint32_t type so it would be cleaner to have it as uint32_t. The initializer to -1 can be dropped as we always do the mapping before using it and we never check the 'not set' value anyway. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index b0b82f5..3d2cef6 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -50,7 +50,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); static struct vcpu_info __percpu *xen_vcpu_info; /* Linux <-> Xen vCPU id mapping */ -DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); /* These are unused until we support booting "pre-ballooned" */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 8ffb089..b86ebb1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -118,7 +118,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ -DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); enum xen_domain_type xen_domain_type = XEN_NATIVE; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 9a37c541..b5486e6 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -9,8 +9,8 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); -DECLARE_PER_CPU(int, xen_vcpu_id); -static inline int xen_vcpu_nr(int cpu) +DECLARE_PER_CPU(uint32_t, xen_vcpu_id); +static inline uint32_t xen_vcpu_nr(int cpu) { return per_cpu(xen_vcpu_id, cpu); } -- cgit v0.10.2 From 486b0f7bcd64be027535811ef44195bc1027fbd3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 19 Aug 2016 15:34:01 -0700 Subject: r5cache: set MD_JOURNAL_CLEAN correctly Currently, the code sets MD_JOURNAL_CLEAN when the array has MD_FEATURE_JOURNAL and the recovery_cp is MaxSector. The array will be MD_JOURNAL_CLEAN even if the journal device is missing. With this patch, the MD_JOURNAL_CLEAN is only set when the journal device presents. Signed-off-by: Song Liu Signed-off-by: Shaohua Li diff --git a/drivers/md/md.c b/drivers/md/md.c index cc25cbc..4f6cf3b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->new_chunk_sectors = mddev->chunk_sectors; } - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) { + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) set_bit(MD_HAS_JOURNAL, &mddev->flags); - if (mddev->recovery_cp == MaxSector) - set_bit(MD_JOURNAL_CLEAN, &mddev->flags); - } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for * spares (which don't need an event count) */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4f8f524..2119e09 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6840,11 +6840,14 @@ static int raid5_run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) { - printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n", - mdname(mddev)); - mddev->ro = 1; - set_disk_ro(mddev->gendisk, 1); + if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { + if (!journal_dev) { + pr_err("md/raid:%s: journal disk is missing, force array readonly\n", + mdname(mddev)); + mddev->ro = 1; + set_disk_ro(mddev->gendisk, 1); + } else if (mddev->recovery_cp == MaxSector) + set_bit(MD_JOURNAL_CLEAN, &mddev->flags); } conf->min_offset_diff = min_offset_diff; -- cgit v0.10.2 From 0f6187dbe542d71ace8ba0908954b0f4f8a30a1e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 21 Aug 2016 14:42:25 +0000 Subject: md-cluster: fix error return code in join() Fix to return error code -ENOMEM from the lockres_init() error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Shaohua Li diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 41573f1..34a840d 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -834,8 +834,10 @@ static int join(struct mddev *mddev, int nodes) goto err; } cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); - if (!cinfo->ack_lockres) + if (!cinfo->ack_lockres) { + ret = -ENOMEM; goto err; + } /* get sync CR lock on ACK. */ if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", @@ -849,8 +851,10 @@ static int join(struct mddev *mddev, int nodes) pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1); - if (!cinfo->bitmap_lockres) + if (!cinfo->bitmap_lockres) { + ret = -ENOMEM; goto err; + } if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) { pr_err("Failed to get bitmap lock\n"); ret = -EINVAL; @@ -858,8 +862,10 @@ static int join(struct mddev *mddev, int nodes) } cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0); - if (!cinfo->resync_lockres) + if (!cinfo->resync_lockres) { + ret = -ENOMEM; goto err; + } return 0; err: -- cgit v0.10.2 From 27028626b4b9022dcac23688e09ea43b36e1183c Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Tue, 23 Aug 2016 10:53:57 +0200 Subject: raid10: record correct address of bad block For failed write request record block address on a device, not block address in an array. Signed-off-by: Tomasz Majchrzak Signed-off-by: Shaohua Li diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1a632a8..4589866 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) while (sect_to_write) { struct bio *wbio; + sector_t wsector; if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors' */ wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); - wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ - choose_data_offset(r10_bio, rdev) + - (sector - r10_bio->sector)); + wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); + wbio->bi_iter.bi_sector = wsector + + choose_data_offset(r10_bio, rdev); wbio->bi_bdev = rdev->bdev; bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (submit_bio_wait(wbio) < 0) /* Failure! */ - ok = rdev_set_badblocks(rdev, sector, + ok = rdev_set_badblocks(rdev, wsector, sectors, 0) && ok; -- cgit v0.10.2 From 5f9d1fde7d54a5d5fd8cccbee9c9c31474fcdcf2 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 22 Aug 2016 21:14:01 -0700 Subject: raid5: fix memory leak of bio integrity data Yi reported a memory leak of raid5 with DIF/DIX enabled disks. raid5 doesn't alloc/free bio, instead it reuses bios. There are two issues in current code: 1. the code calls bio_init (from init_stripe->raid5_build_block->bio_init) then bio_reset (ops_run_io). The bio is reused, so likely there is integrity data attached. bio_init will clear a pointer to integrity data and makes bio_reset can't release the data 2. bio_reset is called before dispatching bio. After bio is finished, it's possible we don't free bio's integrity data (eg, we don't call bio_reset again) Both issues will cause memory leak. The patch moves bio_init to stripe creation and bio_reset to bio end io. This will fix the two issues. Reported-by: Yi Zhang Signed-off-by: Shaohua Li diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2119e09..d1a279b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1005,7 +1005,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(bi); bi->bi_bdev = rdev->bdev; bio_set_op_attrs(bi, op, op_flags); bi->bi_end_io = op_is_write(op) @@ -1057,7 +1056,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(rbi); rbi->bi_bdev = rrdev->bdev; bio_set_op_attrs(rbi, op, op_flags); BUG_ON(!op_is_write(op)); @@ -1990,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) put_cpu(); } -static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) +static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, + int disks) { struct stripe_head *sh; + int i; sh = kmem_cache_zalloc(sc, gfp); if (sh) { @@ -2001,6 +2001,12 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->lru); atomic_set(&sh->count, 1); + for (i = 0; i < disks; i++) { + struct r5dev *dev = &sh->dev[i]; + + bio_init(&dev->req); + bio_init(&dev->rreq); + } } return sh; } @@ -2008,7 +2014,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) { struct stripe_head *sh; - sh = alloc_stripe(conf->slab_cache, gfp); + sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size); if (!sh) return 0; @@ -2179,7 +2185,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) mutex_lock(&conf->cache_size_mutex); for (i = conf->max_nr_stripes; i; i--) { - nsh = alloc_stripe(sc, GFP_KERNEL); + nsh = alloc_stripe(sc, GFP_KERNEL, newsize); if (!nsh) break; @@ -2311,6 +2317,7 @@ static void raid5_end_read_request(struct bio * bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2414,6 +2421,7 @@ static void raid5_end_read_request(struct bio * bi) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); + bio_reset(bi); } static void raid5_end_write_request(struct bio *bi) @@ -2448,6 +2456,7 @@ static void raid5_end_write_request(struct bio *bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2491,18 +2500,17 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && sh != sh->batch_head) raid5_release_stripe(sh->batch_head); + bio_reset(bi); } static void raid5_build_block(struct stripe_head *sh, int i, int previous) { struct r5dev *dev = &sh->dev[i]; - bio_init(&dev->req); dev->req.bi_io_vec = &dev->vec; dev->req.bi_max_vecs = 1; dev->req.bi_private = sh; - bio_init(&dev->rreq); dev->rreq.bi_io_vec = &dev->rvec; dev->rreq.bi_max_vecs = 1; dev->rreq.bi_private = sh; -- cgit v0.10.2 From 45c91d808ff989d950e260dab9f89e8f4a3c9c2c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 22 Aug 2016 21:14:02 -0700 Subject: raid5: avoid unnecessary bio data set bio_reset doesn't change bi_io_vec and bi_max_vecs, so we don't need to set them every time. bi_private will be set before the bio is dispatched. Signed-off-by: Shaohua Li diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d1a279b..62febe8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2005,7 +2005,12 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, struct r5dev *dev = &sh->dev[i]; bio_init(&dev->req); + dev->req.bi_io_vec = &dev->vec; + dev->req.bi_max_vecs = 1; + bio_init(&dev->rreq); + dev->rreq.bi_io_vec = &dev->rvec; + dev->rreq.bi_max_vecs = 1; } } return sh; @@ -2507,14 +2512,6 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous) { struct r5dev *dev = &sh->dev[i]; - dev->req.bi_io_vec = &dev->vec; - dev->req.bi_max_vecs = 1; - dev->req.bi_private = sh; - - dev->rreq.bi_io_vec = &dev->rvec; - dev->rreq.bi_max_vecs = 1; - dev->rreq.bi_private = sh; - dev->flags = 0; dev->sector = raid5_compute_blocknr(sh, i, previous); } -- cgit v0.10.2 From af7c388a9c2e5fdd36da6eaaa35fb86fb8aefd0b Mon Sep 17 00:00:00 2001 From: Vince Hsu Date: Wed, 24 Aug 2016 15:56:56 +0200 Subject: clk: tegra: remove TEGRA_PLL_USE_LOCK for PLLD/PLLD2 Tegra114 has a HW bug that the PLLD/PLLD2 lock bit cannot be asserted when the DIS power domain is during up-powergating process but the clamp to this domain is not removed yet. That causes a timeout and aborts the power sequence, although the PLLD/PLLD2 has already locked. To remove the false alarm, we don't use the lock for PLLD/PLLD2. Just wait 1ms and treat the clocks as locked. Signed-off-by: Vince Hsu Tested-by: Jonathan Hunter Signed-off-by: Thierry Reding Signed-off-by: Stephen Boyd diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c index 64da7b7..933b5dd 100644 --- a/drivers/clk/tegra/clk-tegra114.c +++ b/drivers/clk/tegra/clk-tegra114.c @@ -428,7 +428,7 @@ static struct tegra_clk_pll_params pll_d_params = { .div_nmp = &pllp_nmp, .freq_table = pll_d_freq_table, .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON | - TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE, + TEGRA_PLL_HAS_LOCK_ENABLE, }; static struct tegra_clk_pll_params pll_d2_params = { @@ -446,7 +446,7 @@ static struct tegra_clk_pll_params pll_d2_params = { .div_nmp = &pllp_nmp, .freq_table = pll_d_freq_table, .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON | - TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE, + TEGRA_PLL_HAS_LOCK_ENABLE, }; static const struct pdiv_map pllu_p[] = { -- cgit v0.10.2 From e1718d97aa88ea44a6a8f50ff464253dd0dacf01 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Aug 2016 12:31:36 -0400 Subject: drm/amdgpu: avoid a possible array overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When looking up the connector type make sure the index is valid. Avoids a later crash if we read past the end of the array. Workaround for bug: https://bugs.freedesktop.org/show_bug.cgi?id=97460 Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 9831753..1514223 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -321,6 +321,12 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * (le16_to_cpu(path->usConnObjectId) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; + if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) { + DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n", + con_obj_id, le16_to_cpu(path->usDeviceTag)); + continue; + } + connector_type = object_connector_convert[con_obj_id]; connector_object_id = con_obj_id; -- cgit v0.10.2 From 611a1507fe8569ce1adab3abc982ea58ab559fb9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Aug 2016 13:04:15 -0400 Subject: drm/amdgpu: skip TV/CV in display parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No asics supported by amdgpu support analog TV. Workaround for bug: https://bugs.freedesktop.org/show_bug.cgi?id=97460 Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 1514223..fe872b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -321,6 +321,13 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * (le16_to_cpu(path->usConnObjectId) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; + /* Skip TV/CV support */ + if ((le16_to_cpu(path->usDeviceTag) == + ATOM_DEVICE_TV1_SUPPORT) || + (le16_to_cpu(path->usDeviceTag) == + ATOM_DEVICE_CV_SUPPORT)) + continue; + if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) { DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n", con_obj_id, le16_to_cpu(path->usDeviceTag)); -- cgit v0.10.2 From 716b076ba4b273f5f85c97448c5110c6d21e73e6 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Wed, 24 Aug 2016 12:14:19 -0400 Subject: IB/srpt: Update sport->port_guid with each port refresh If port_guid is set with the default subnet_prefix, then we get a change event and run a port refresh, we don't update the port_guid. As a result, attempts to create a target device that uses the new subnet_prefix in the wwn will fail to find a match and be rejected by the ib_srpt driver. This makes it impossible to configure a port if it was initialized with a default subnet_prefix and later changed to any non-default subnet-prefix. Updating the port refresh task to always update the wwn based upon the current subnext_prefix solves this problem. Cc: Bart Van Assche Cc: nab@linux-iscsi.org Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dfa23b0..883bbfe 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) goto err_query_port; + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device) sdev->device->name, i); goto err_ring; } - snprintf(sport->port_guid, sizeof(sport->port_guid), - "0x%016llx%016llx", - be64_to_cpu(sport->gid.global.subnet_prefix), - be64_to_cpu(sport->gid.global.interface_id)); } spin_lock(&srpt_dev_lock); -- cgit v0.10.2 From e57690fe009b2ab0cee8a57f53be634540e49c9d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:34:35 -0600 Subject: blk-mq: don't overwrite rq->mq_ctx We do this in a few places, if the CPU is offline. This isn't allowed, though, since on multi queue hardware, we can't just move a request from one software queue to another, if they map to different hardware queues. The request and tag isn't valid on another hardware queue. This can happen if plugging races with CPU offlining. But it does no harm, since it can only happen in the window where we are currently busy freezing the queue and flushing IO, in preparation for redoing the software <-> hardware queue mappings. Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index e931a0e..729169d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1036,10 +1036,11 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) EXPORT_SYMBOL(blk_mq_delay_queue); static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct request *rq, bool at_head) { + struct blk_mq_ctx *ctx = rq->mq_ctx; + trace_block_rq_insert(hctx->queue, rq); if (at_head) @@ -1053,20 +1054,16 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, { struct blk_mq_ctx *ctx = rq->mq_ctx; - __blk_mq_insert_req_list(hctx, ctx, rq, at_head); + __blk_mq_insert_req_list(hctx, rq, at_head); blk_mq_hctx_mark_pending(hctx, ctx); } void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, - bool async) + bool async) { + struct blk_mq_ctx *ctx = rq->mq_ctx; struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx; - - current_ctx = blk_mq_get_ctx(q); - if (!cpu_online(ctx->cpu)) - rq->mq_ctx = ctx = current_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); @@ -1076,8 +1073,6 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, if (run_queue) blk_mq_run_hw_queue(hctx, async); - - blk_mq_put_ctx(current_ctx); } static void blk_mq_insert_requests(struct request_queue *q, @@ -1088,14 +1083,9 @@ static void blk_mq_insert_requests(struct request_queue *q, { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *current_ctx; trace_block_unplug(q, depth, !from_schedule); - current_ctx = blk_mq_get_ctx(q); - - if (!cpu_online(ctx->cpu)) - ctx = current_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); /* @@ -1107,15 +1097,14 @@ static void blk_mq_insert_requests(struct request_queue *q, struct request *rq; rq = list_first_entry(list, struct request, queuelist); + BUG_ON(rq->mq_ctx != ctx); list_del_init(&rq->queuelist); - rq->mq_ctx = ctx; - __blk_mq_insert_req_list(hctx, ctx, rq, false); + __blk_mq_insert_req_list(hctx, rq, false); } blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); blk_mq_run_hw_queue(hctx, from_schedule); - blk_mq_put_ctx(current_ctx); } static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) @@ -1630,16 +1619,17 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) return 0; } +/* + * 'cpu' is going away. splice any existing rq_list entries from this + * software queue to the hw queue dispatch list, and ensure that it + * gets run. + */ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) { - struct request_queue *q = hctx->queue; struct blk_mq_ctx *ctx; LIST_HEAD(tmp); - /* - * Move ctx entries to new CPU, if this one is going away. - */ - ctx = __blk_mq_get_ctx(q, cpu); + ctx = __blk_mq_get_ctx(hctx->queue, cpu); spin_lock(&ctx->lock); if (!list_empty(&ctx->rq_list)) { @@ -1651,24 +1641,11 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) if (list_empty(&tmp)) return NOTIFY_OK; - ctx = blk_mq_get_ctx(q); - spin_lock(&ctx->lock); - - while (!list_empty(&tmp)) { - struct request *rq; - - rq = list_first_entry(&tmp, struct request, queuelist); - rq->mq_ctx = ctx; - list_move_tail(&rq->queuelist, &ctx->rq_list); - } - - hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_hctx_mark_pending(hctx, ctx); - - spin_unlock(&ctx->lock); + spin_lock(&hctx->lock); + list_splice_tail_init(&tmp, &hctx->dispatch); + spin_unlock(&hctx->lock); blk_mq_run_hw_queue(hctx, true); - blk_mq_put_ctx(ctx); return NOTIFY_OK; } -- cgit v0.10.2 From 0e87e58bf60edb6bb28e493c7a143f41b091a5e5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:38:01 -0600 Subject: blk-mq: improve warning for running a queue on the wrong CPU __blk_mq_run_hw_queue() currently warns if we are running the queue on a CPU that isn't set in its mask. However, this can happen if a CPU is being offlined, and the workqueue handling will place the work on CPU0 instead. Improve the warning so that it only triggers if the batch cpu in the hardware queue is currently online. If it triggers for that case, then it's indicative of a flow problem in blk-mq, so we want to retain it for that case. Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index 729169d..13f5a6c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -793,11 +793,12 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) struct list_head *dptr; int queued; - WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); - if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) return; + WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && + cpu_online(hctx->next_cpu)); + hctx->run++; /* -- cgit v0.10.2 From a45f9d41c9dd2c28e38b9b88f69c39bc63807de9 Mon Sep 17 00:00:00 2001 From: Xing Zheng Date: Wed, 24 Aug 2016 11:29:39 -0700 Subject: clk: rockchip: mark aclk_emmc_noc as a critical clock on rk3399 We don't have code to handle any of the noc clocks in rk3399 and they're all just listed as critical clocks. Let's do the same for aclk_emmc_noc. Without this clock being marked as critical we have problems around suspend/resume after commit 20c389e656a8 ("clk: rockchip: fix incorrect aclk_emmc source gate bits on rk3399"). Before that change we were presumably not actually gating any of these clocks because we were setting the wrong gate. Fixes: 20c389e656a8 ("clk: rockchip: fix incorrect aclk_emmc source gate bits on rk3399") Signed-off-by: Xing Zheng Signed-off-by: Douglas Anderson Signed-off-by: Heiko Stuebner diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index ec5b2fd..cdfabeb 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -1484,6 +1484,7 @@ static const char *const rk3399_cru_critical_clocks[] __initconst = { "hclk_perilp1", "hclk_perilp1_noc", "aclk_dmac0_perilp", + "aclk_emmc_noc", "gpll_hclk_perilp1_src", "gpll_aclk_perilp0_src", "gpll_aclk_perihp_src", -- cgit v0.10.2 From 299f6230bc6d0ccd5f95bb0fb865d80a9c7d5ccc Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 24 Aug 2016 21:12:58 -0400 Subject: dm flakey: fix reads to be issued if drop_writes configured v4.8-rc3 commit 99f3c90d0d ("dm flakey: error READ bios during the down_interval") overlooked the 'drop_writes' feature, which is meant to allow reads to be issued rather than errored, during the down_interval. Fixes: 99f3c90d0d ("dm flakey: error READ bios during the down_interval") Reported-by: Qu Wenruo Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 97e446d..6a2e8dd 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -289,15 +289,13 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Map reads as normal only if corrupt_bio_byte set. + * Error reads if neither corrupt_bio_byte or drop_writes are set. + * Otherwise, flakey_end_io() will decide if the reads should be modified. */ if (bio_data_dir(bio) == READ) { - /* If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) - goto map_bio; - else + if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags)) return -EIO; + goto map_bio; } /* @@ -334,14 +332,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) struct flakey_c *fc = ti->private; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); - /* - * Corrupt successful READs while in down state. - */ if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { - if (fc->corrupt_bio_byte) + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) { + /* + * Corrupt successful matching READs while in down state. + */ corrupt_bio_data(bio, fc); - else + + } else if (!test_bit(DROP_WRITES, &fc->flags)) { + /* + * Error read during the down_interval if drop_writes + * wasn't configured. + */ return -EIO; + } } return error; -- cgit v0.10.2 From 9c5a559d9495bba6e5b6c5ee4e8e2f2b71088684 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 23 Aug 2016 21:17:48 +0200 Subject: dm log: fix unitialized bio operation flags Commit e6047149db ("dm: use bio op accessors") switched DM over to using bio_set_op_attrs() but didn't take care to initialize lc->io_req.bi_op_flags in dm-log.c:rw_header(). This caused rw_header()'s call to dm_io() to make bio->bi_op_flags be uninitialized in dm-io.c:do_region(), which ultimately resulted in a SCSI BUG() in sd_init_command(). Also, adjust rw_header() and its callers to use REQ_OP_{READ|WRITE}. Fixes: e6047149db ("dm: use bio op accessors") Signed-off-by: Heinz Mauelshagen Reviewed-by: Shaun Tancheff Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 4ca2d1d..07fc1ad 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -291,9 +291,10 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis core->nr_regions = le64_to_cpu(disk->nr_regions); } -static int rw_header(struct log_c *lc, int rw) +static int rw_header(struct log_c *lc, int op) { - lc->io_req.bi_op = rw; + lc->io_req.bi_op = op; + lc->io_req.bi_op_flags = 0; return dm_io(&lc->io_req, 1, &lc->header_location, NULL); } @@ -316,7 +317,7 @@ static int read_header(struct log_c *log) { int r; - r = rw_header(log, READ); + r = rw_header(log, REQ_OP_READ); if (r) return r; @@ -630,7 +631,7 @@ static int disk_resume(struct dm_dirty_log *log) header_to_disk(&lc->header, lc->disk_header); /* write the new header */ - r = rw_header(lc, WRITE); + r = rw_header(lc, REQ_OP_WRITE); if (!r) { r = flush_header(lc); if (r) @@ -698,7 +699,7 @@ static int disk_flush(struct dm_dirty_log *log) log_clear_bit(lc, lc->clean_bits, i); } - r = rw_header(lc, WRITE); + r = rw_header(lc, REQ_OP_WRITE); if (r) fail_log_device(lc); else { -- cgit v0.10.2 From 16590a228109e2f318d2cc6466221134cfab723a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Aug 2016 14:57:42 -0400 Subject: SUNRPC: Silence WARN_ON when NFSv4.1 over RDMA is in use Using NFSv4.1 on RDMA should be safe, so broaden the new checks in rpc_create(). WARN_ON_ONCE is used, matching most other WARN call sites in clnt.c. Fixes: 39a9beab5acb ("rpc: share one xps between all backchannels") Fixes: d50039ea5ee6 ("nfsd4/rpc: move backchannel create logic...") Signed-off-by: Chuck Lever Reviewed-by: J. Bruce Fields Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7f79fb7..66f23b3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -453,7 +453,7 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt_switch *xps; if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) { - WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC)); xps = args->bc_xprt->xpt_bc_xps; xprt_switch_get(xps); } else { @@ -520,7 +520,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) char servername[48]; if (args->bc_xprt) { - WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC)); xprt = args->bc_xprt->xpt_bc_xprt; if (xprt) { xprt_get(xprt); -- cgit v0.10.2 From 23fd537c9508fb6e3b93ddf23982f51afc087781 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 24 Aug 2016 14:33:27 +0300 Subject: usb: gadget: udc: core: don't starve DMA resources Always unmap all SG entries as required by DMA API Fixes: a698908d3b3b ("usb: gadget: add generic map/unmap request utilities") Cc: # v3.4+ Signed-off-by: Felipe Balbi diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 934f838..40c04bb 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -827,7 +827,7 @@ void usb_gadget_unmap_request_by_dev(struct device *dev, return; if (req->num_mapped_sgs) { - dma_unmap_sg(dev, req->sg, req->num_mapped_sgs, + dma_unmap_sg(dev, req->sg, req->num_sgs, is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE); req->num_mapped_sgs = 0; -- cgit v0.10.2 From 696fe69d7e631f00f23b0ef1694d9b90058dca54 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 24 Aug 2016 14:32:39 +0300 Subject: usb: dwc3: debug: fix ep name on trace output There was a typo when generating endpoint name which would be very confusing when debugging. Fix it. Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index 22dfc3d..33ab2a2 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -192,7 +192,7 @@ dwc3_ep_event_string(const struct dwc3_event_depevt *event) int ret; ret = sprintf(str, "ep%d%s: ", epnum >> 1, - (epnum & 1) ? "in" : "in"); + (epnum & 1) ? "in" : "out"); if (ret < 0) return "UNKNOWN"; -- cgit v0.10.2 From 6f8245b4e37c2072d3daea24e19dbc0162ffd22c Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 24 Aug 2016 14:40:13 +0300 Subject: usb: dwc3: gadget: always decrement by 1 We need to decrement in both cases (enq > deq and enq < deq) Signed-off-by: Felipe Balbi diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 122e64d..7a8d3d8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -884,12 +884,9 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) return DWC3_TRB_NUM - 1; } - trbs_left = dep->trb_dequeue - dep->trb_enqueue; + trbs_left = dep->trb_dequeue - dep->trb_enqueue - 1; trbs_left &= (DWC3_TRB_NUM - 1); - if (dep->trb_dequeue < dep->trb_enqueue) - trbs_left--; - return trbs_left; } -- cgit v0.10.2 From fd363bd417ddb6103564c69cfcbd92d9a7877431 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 24 Aug 2016 18:02:08 +0100 Subject: arm64: avoid TLB conflict with CONFIG_RANDOMIZE_BASE When CONFIG_RANDOMIZE_BASE is selected, we modify the page tables to remap the kernel at a newly-chosen VA range. We do this with the MMU disabled, but do not invalidate TLBs prior to re-enabling the MMU with the new tables. Thus the old mappings entries may still live in TLBs, and we risk violating Break-Before-Make requirements, leading to TLB conflicts and/or other issues. We invalidate TLBs when we uninsall the idmap in early setup code, but prior to this we are subject to issues relating to the Break-Before-Make violation. Avoid these issues by invalidating the TLBs before the new mappings can be used by the hardware. Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Cc: # 4.6+ Acked-by: Ard Biesheuvel Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b77f583..3e7b050 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -757,6 +757,9 @@ ENTRY(__enable_mmu) isb bl __create_page_tables // recreate kernel mapping + tlbi vmalle1 // Remove any stale TLB entries + dsb nsh + msr sctlr_el1, x19 // re-enable the MMU isb ic iallu // flush instructions fetched -- cgit v0.10.2 From 1c1ea4f781db9f754842b9c31d1eff400d17cddc Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 19 Jul 2016 15:36:05 -0700 Subject: Btrfs: fix memory leak of reloc_root When some critical errors occur and FS would be flipped into RO, if we have an on-going balance, we can end up with a memory leak of root->reloc_root since btrfs_drop_snapshots() bails out without freeing reloc_root at the very early start. However, we're not able to free reloc_root in btrfs_drop_snapshots() because its caller, merge_reloc_roots(), still needs to access it to cleanup reloc_root's rbtree. This makes us free reloc_root when we're going to free fs/file roots. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff2362d..4a34c9f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3744,8 +3744,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { btrfs_free_log(NULL, root); + if (root->reloc_root) { + free_extent_buffer(root->reloc_root->node); + free_extent_buffer(root->reloc_root->commit_root); + btrfs_put_fs_root(root->reloc_root); + root->reloc_root = NULL; + } + } if (root->free_ino_pinned) __btrfs_remove_free_space_cache(root->free_ino_pinned); -- cgit v0.10.2 From d8422ba334f9df16e071bc77707e55fd7f8446ae Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Jul 2016 15:04:18 +0800 Subject: btrfs: backref: Fix soft lockup in __merge_refs function When over 1000 file extents refers to one extent, find_parent_nodes() will be obviously slow, due to the O(n^2)~O(n^3) loops inside __merge_refs(). The following ftrace shows the cubic growth of execution time: 256 refs 5) + 91.768 us | __add_keyed_refs.isra.12 [btrfs](); 5) 1.447 us | __add_missing_keys.isra.13 [btrfs](); 5) ! 114.544 us | __merge_refs [btrfs](); 5) ! 136.399 us | __merge_refs [btrfs](); 512 refs 6) ! 279.859 us | __add_keyed_refs.isra.12 [btrfs](); 6) 3.164 us | __add_missing_keys.isra.13 [btrfs](); 6) ! 442.498 us | __merge_refs [btrfs](); 6) # 2091.073 us | __merge_refs [btrfs](); and 1024 refs 7) ! 368.683 us | __add_keyed_refs.isra.12 [btrfs](); 7) 4.810 us | __add_missing_keys.isra.13 [btrfs](); 7) # 2043.428 us | __merge_refs [btrfs](); 7) * 18964.23 us | __merge_refs [btrfs](); And sort them into the following char: (Unit: us) ------------------------------------------------------------------------ Trace function | 256 ref | 512 refs | 1024 refs | ------------------------------------------------------------------------ __add_keyed_refs | 91 | 249 | 368 | __add_missing_keys | 1 | 3 | 4 | __merge_refs 1st call | 114 | 442 | 2043 | __merge_refs 2nd call | 136 | 2091 | 18964 | ------------------------------------------------------------------------ We can see the that __add_keyed_refs() grows almost in linear behavior. And __add_missing_keys() in this case doesn't change much or takes much time. While for the 1st __merge_refs() it's square growth for the 2nd __merge_refs() call it's cubic growth. It's no doubt that merge_refs() will take a long long time to execute if the number of refs continues its grows. So add a cond_resced() into the loop of __merge_refs(). Although this will solve the problem of soft lockup, we need to use the new rb_tree based structure introduced by Lu Fengqi to really solve the long execution time. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2b88439..455a6b2 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode) list_del(&ref2->list); kmem_cache_free(btrfs_prelim_ref_cache, ref2); + cond_resched(); } } -- cgit v0.10.2 From f3bca8028bd934e96257b8bd1143e6474fe98465 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 20 Jul 2016 17:33:44 -0700 Subject: Btrfs: add ASSERT for block group's memory leak This adds several ASSERT()' s to report memory leak of block group cache. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c2b81b0..e962b0e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9942,6 +9942,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group->iref = 0; block_group->inode = NULL; spin_unlock(&block_group->lock); + ASSERT(block_group->io_ctl.inode == NULL); iput(inode); last = block_group->key.objectid + block_group->key.offset; btrfs_put_block_group(block_group); @@ -9999,6 +10000,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); + ASSERT(list_empty(&block_group->dirty_list)); + ASSERT(list_empty(&block_group->io_list)); + ASSERT(list_empty(&block_group->bg_list)); + ASSERT(atomic_read(&block_group->count) == 1); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); -- cgit v0.10.2 From eecba891d38051ebf7f4af6394d188a5fd151a6a Mon Sep 17 00:00:00 2001 From: Alex Lyakas Date: Sun, 6 Dec 2015 12:32:31 +0200 Subject: btrfs: flush_space: treat return value of do_chunk_alloc properly do_chunk_alloc returns 1 when it succeeds to allocate a new chunk. But flush_space will not convert this to 0, and will also return 1. As a result, reserve_metadata_bytes will think that flush_space failed, and may potentially return this value "1" to the caller (depends how reserve_metadata_bytes was called). The caller will also treat this as an error. For example, btrfs_block_rsv_refill does: int ret = -ENOSPC; ... ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 0); return 0; } return ret; So it will return -ENOSPC. Signed-off-by: Alex Lyakas Reviewed-by: Josef Bacik Reviewed-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e962b0e..d2cc16f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4882,7 +4882,7 @@ static int flush_space(struct btrfs_root *root, btrfs_get_alloc_profile(root, 0), CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans, root); - if (ret == -ENOSPC) + if (ret > 0 || ret == -ENOSPC) ret = 0; break; case COMMIT_TRANS: -- cgit v0.10.2 From d2c609b834d62f1e91f1635a27dca29f7806d3d6 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 15 Aug 2016 12:10:33 -0400 Subject: btrfs: properly track when rescan worker is running The qgroup_flags field is overloaded such that it reflects the on-disk status of qgroups and the runtime state. The BTRFS_QGROUP_STATUS_FLAG_RESCAN flag is used to indicate that a rescan operation is in progress, but if the file system is unmounted while a rescan is running, the rescan operation is paused. If the file system is then mounted read-only, the flag will still be present but the rescan operation will not have been resumed. When we go to umount, btrfs_qgroup_wait_for_completion will see the flag and interpret it to mean that the rescan worker is still running and will wait for a completion that will never come. This patch uses a separate flag to indicate when the worker is running. The locking and state surrounding the qgroup rescan worker needs a lot of attention beyond this patch but this is enough to avoid a hung umount. Cc: # v4.4+ Signed-off-by; Jeff Mahoney Reviewed-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f66a0ba..acc6850 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1028,6 +1028,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *qgroup_rescan_workers; struct completion qgroup_rescan_completion; struct btrfs_work qgroup_rescan_work; + bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */ /* filesystem state */ unsigned long fs_state; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a34c9f..f5b2a7f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2304,6 +2304,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; fs_info->qgroup_ulist = NULL; + fs_info->qgroup_rescan_running = false; mutex_init(&fs_info->qgroup_rescan_lock); } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 93ee1c1..3fe295e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2303,6 +2303,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_rescan_running = true; + mutex_unlock(&fs_info->qgroup_rescan_lock); + path = btrfs_alloc_path(); if (!path) goto out; @@ -2369,6 +2373,9 @@ out: } done: + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_rescan_running = false; + mutex_unlock(&fs_info->qgroup_rescan_lock); complete_all(&fs_info->qgroup_rescan_completion); } @@ -2494,7 +2501,7 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) mutex_lock(&fs_info->qgroup_rescan_lock); spin_lock(&fs_info->qgroup_lock); - running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; + running = fs_info->qgroup_rescan_running; spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); -- cgit v0.10.2 From d06f23d6a947c9abae41dc46be69a56baf36f436 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 8 Aug 2016 22:08:06 -0400 Subject: btrfs: waiting on qgroup rescan should not always be interruptible We wait on qgroup rescan completion in three places: file system shutdown, the quota disable ioctl, and the rescan wait ioctl. If the user sends a signal while we're waiting, we continue happily along. This is expected behavior for the rescan wait ioctl. It's racy in the shutdown path but mostly works due to other unrelated synchronization points. In the quota disable path, it Oopses the kernel pretty much immediately. Cc: # v4.4+ Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f5b2a7f..7857f64 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3864,7 +3864,7 @@ void close_ctree(struct btrfs_root *root) smp_mb(); /* wait for the qgroup rescan worker to stop */ - btrfs_qgroup_wait_for_completion(fs_info); + btrfs_qgroup_wait_for_completion(fs_info, false); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14ed1e9..b2a2da5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return btrfs_qgroup_wait_for_completion(root->fs_info); + return btrfs_qgroup_wait_for_completion(root->fs_info, true); } static long _btrfs_ioctl_set_received_subvol(struct file *file, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 3fe295e..13eb6a7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, goto out; fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; - btrfs_qgroup_wait_for_completion(fs_info); + btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; @@ -2494,7 +2494,8 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, + bool interruptible) { int running; int ret = 0; @@ -2505,9 +2506,14 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); - if (running) + if (!running) + return 0; + + if (interruptible) ret = wait_for_completion_interruptible( &fs_info->qgroup_rescan_completion); + else + wait_for_completion(&fs_info->qgroup_rescan_completion); return ret; } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 710887c..af3e557 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); -int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, + bool interruptible); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst); int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, -- cgit v0.10.2 From cb93b52cc005ba0e470845b519c662e661d5113c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 15 Aug 2016 10:36:50 +0800 Subject: btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent() Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions: 1. btrfs_qgroup_insert_dirty_extent_nolock() Almost the same with original code. For delayed_ref usage, which has delayed refs locked. Change the return value type to int, since caller never needs the pointer, but only needs to know if they need to free the allocated memory. 2. btrfs_qgroup_insert_dirty_extent() The more encapsulated version. Will do the delayed_refs lock, memory allocation, quota enabled check and other things. The original design is to keep exported functions to minimal, but since more btrfs hacks exposed, like replacing path in balance, we need to record dirty extents manually, so we have to add such functions. Also, add comment for both functions, to info developers how to keep qgroup correct when doing hacks. Cc: Mark Fasheh Signed-off-by: Qu Wenruo Reviewed-and-Tested-by: Goldwyn Rodrigues Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index d9ddcfc..ac02e04 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_qgroup_extent_record *qexisting; int count_mod = 1; int must_insert_reserved = 0; @@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, qrecord->num_bytes = num_bytes; qrecord->old_roots = NULL; - qexisting = btrfs_qgroup_insert_dirty_extent(fs_info, - delayed_refs, - qrecord); - if (qexisting) + if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info, + delayed_refs, qrecord)) kfree(qrecord); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d2cc16f..d108727 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8521,35 +8521,6 @@ reada: wc->reada_slot = slot; } -/* - * These may not be seen by the usual inc/dec ref code so we have to - * add them here. - */ -static int record_one_subtree_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes) -{ - struct btrfs_qgroup_extent_record *qrecord; - struct btrfs_delayed_ref_root *delayed_refs; - - qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); - if (!qrecord) - return -ENOMEM; - - qrecord->bytenr = bytenr; - qrecord->num_bytes = num_bytes; - qrecord->old_roots = NULL; - - delayed_refs = &trans->transaction->delayed_refs; - spin_lock(&delayed_refs->lock); - if (btrfs_qgroup_insert_dirty_extent(trans->fs_info, - delayed_refs, qrecord)) - kfree(qrecord); - spin_unlock(&delayed_refs->lock); - - return 0; -} - static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) @@ -8583,7 +8554,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, + bytenr, num_bytes, GFP_NOFS); if (ret) return ret; } @@ -8732,8 +8704,9 @@ walk_down: btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; - ret = record_one_subtree_extent(trans, root, child_bytenr, - root->nodesize); + ret = btrfs_qgroup_insert_dirty_extent(trans, + root->fs_info, child_bytenr, + root->nodesize, GFP_NOFS); if (ret) goto out; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 13eb6a7..8db2e29 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, return ret; } -struct btrfs_qgroup_extent_record * -btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_qgroup_extent_record *record) +int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record) { struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; struct rb_node *parent_node = NULL; @@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, else if (bytenr > entry->bytenr) p = &(*p)->rb_right; else - return entry; + return 1; } rb_link_node(&record->node, parent_node, p); rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); - return NULL; + return 0; +} + +int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, + gfp_t gfp_flag) +{ + struct btrfs_qgroup_extent_record *record; + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0) + return 0; + if (WARN_ON(trans == NULL)) + return -EINVAL; + record = kmalloc(sizeof(*record), gfp_flag); + if (!record) + return -ENOMEM; + + delayed_refs = &trans->transaction->delayed_refs; + record->bytenr = bytenr; + record->num_bytes = num_bytes; + record->old_roots = NULL; + + spin_lock(&delayed_refs->lock); + ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs, + record); + spin_unlock(&delayed_refs->lock); + if (ret > 0) + kfree(record); + return 0; } #define UPDATE_NEW 0 diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index af3e557..1bc64c8 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -64,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); struct btrfs_delayed_extent_op; int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -struct btrfs_qgroup_extent_record * -btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_qgroup_extent_record *record); +/* + * Insert one dirty extent record into @delayed_refs, informing qgroup to + * account that extent at commit trans time. + * + * No lock version, caller must acquire delayed ref lock and allocate memory. + * + * Return 0 for success insert + * Return >0 for existing record, caller can free @record safely. + * Error is not possible + */ +int btrfs_qgroup_insert_dirty_extent_nolock( + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record); + +/* + * Insert one dirty extent record into @delayed_refs, informing qgroup to + * account that extent at commit trans time. + * + * Better encapsulated version. + * + * Return 0 if the operation is done. + * Return <0 for error, like memory allocation failure or invalid parameter + * (NULL trans) + */ +int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, + gfp_t gfp_flag); + int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, -- cgit v0.10.2 From 62b99540a1d91e46422f0e04de50fc723812c421 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 15 Aug 2016 10:36:51 +0800 Subject: btrfs: relocation: Fix leaking qgroups numbers on data extents This patch fixes a REGRESSION introduced in 4.2, caused by the big quota rework. When balancing data extents, qgroup will leak all its numbers for relocated data extents. The relocation is done in the following steps for data extents: 1) Create data reloc tree and inode 2) Copy all data extents to data reloc tree And commit transaction 3) Create tree reloc tree(special snapshot) for any related subvolumes 4) Replace file extent in tree reloc tree with new extents in data reloc tree And commit transaction 5) Merge tree reloc tree with original fs, by swapping tree blocks For 1)~4), since tree reloc tree and data reloc tree doesn't count to qgroup, everything is OK. But for 5), the swapping of tree blocks will only info qgroup to track metadata extents. If metadata extents contain file extents, qgroup number for file extents will get lost, leading to corrupted qgroup accounting. The fix is, before commit transaction of step 5), manually info qgroup to track all file extents in data reloc tree. Since at commit transaction time, the tree swapping is done, and qgroup will account these data extents correctly. Cc: Mark Fasheh Reported-by: Mark Fasheh Reported-by: Filipe Manana Signed-off-by: Qu Wenruo Tested-by: Goldwyn Rodrigues Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b26a5ae..27480ef 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -31,6 +31,7 @@ #include "async-thread.h" #include "free-space-cache.h" #include "inode-map.h" +#include "qgroup.h" /* * backref_node, mapping_node and tree_block start with this @@ -3916,6 +3917,90 @@ int prepare_to_relocate(struct reloc_control *rc) return 0; } +/* + * Qgroup fixer for data chunk relocation. + * The data relocation is done in the following steps + * 1) Copy data extents into data reloc tree + * 2) Create tree reloc tree(special snapshot) for related subvolumes + * 3) Modify file extents in tree reloc tree + * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks + * + * The problem is, data and tree reloc tree are not accounted to qgroup, + * and 4) will only info qgroup to track tree blocks change, not file extents + * in the tree blocks. + * + * The good news is, related data extents are all in data reloc tree, so we + * only need to info qgroup to track all file extents in data reloc tree + * before commit trans. + */ +static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, + struct reloc_control *rc) +{ + struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; + struct inode *inode = rc->data_inode; + struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; + struct btrfs_path *path; + struct btrfs_key key; + int ret = 0; + + if (!fs_info->quota_enabled) + return 0; + + /* + * Only for stage where we update data pointers the qgroup fix is + * valid. + * For MOVING_DATA stage, we will miss the timing of swapping tree + * blocks, and won't fix it. + */ + if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + key.objectid = btrfs_ino(inode); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); + while (1) { + struct btrfs_file_extent_item *fi; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid > btrfs_ino(inode)) + break; + if (key.type != BTRFS_EXTENT_DATA_KEY) + goto next; + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(path->nodes[0], fi) != + BTRFS_FILE_EXTENT_REG) + goto next; + ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, + btrfs_file_extent_disk_bytenr(path->nodes[0], fi), + btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), + GFP_NOFS); + if (ret < 0) + break; +next: + ret = btrfs_next_item(data_reloc_root, path); + if (ret < 0) + break; + if (ret > 0) { + ret = 0; + break; + } + } + unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); +out: + btrfs_free_path(path); + return ret; +} + static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; @@ -4102,10 +4187,16 @@ restart: /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { err = PTR_ERR(trans); - else - btrfs_commit_transaction(trans, rc->extent_root); + goto out_free; + } + err = qgroup_fix_relocated_data_extents(trans, rc); + if (err < 0) { + btrfs_abort_transaction(trans, err); + goto out_free; + } + btrfs_commit_transaction(trans, rc->extent_root); out_free: btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); btrfs_free_path(path); @@ -4468,10 +4559,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { err = PTR_ERR(trans); - else - err = btrfs_commit_transaction(trans, rc->extent_root); + goto out_free; + } + err = qgroup_fix_relocated_data_extents(trans, rc); + if (err < 0) { + btrfs_abort_transaction(trans, err); + goto out_free; + } + err = btrfs_commit_transaction(trans, rc->extent_root); out_free: kfree(rc); out: -- cgit v0.10.2 From df2c95f33e0a28b22509e4ee85365eedf32a1056 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 15 Aug 2016 10:36:52 +0800 Subject: btrfs: qgroup: Fix qgroup incorrectness caused by log replay When doing log replay at mount time(after power loss), qgroup will leak numbers of replayed data extents. The cause is almost the same of balance. So fix it by manually informing qgroup for owner changed extents. The bug can be detected by btrfs/119 test case. Cc: Mark Fasheh Signed-off-by: Qu Wenruo Reviewed-and-Tested-by: Goldwyn Rodrigues Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fff3f3e..ffe92da 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -27,6 +27,7 @@ #include "backref.h" #include "hash.h" #include "compression.h" +#include "qgroup.h" /* magic values for the inode_only field in btrfs_log_inode: * @@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; offset = key->offset - btrfs_file_extent_offset(eb, item); + /* + * Manually record dirty extent, as here we did a shallow + * file extent item copy and skip normal backref update, + * but modifying extent tree all by ourselves. + * So need to manually record dirty extent for qgroup, + * as the owner of the file extent changed from log tree + * (doesn't affect qgroup) to fs/file tree(affects qgroup) + */ + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, + btrfs_file_extent_disk_bytenr(eb, item), + btrfs_file_extent_disk_num_bytes(eb, item), + GFP_NOFS); + if (ret < 0) + goto out; + if (ins.objectid > 0) { u64 csum_start; u64 csum_end; -- cgit v0.10.2 From dcb40c196fc85c6dfb28456480e5a882e26f567d Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Mon, 25 Jul 2016 15:51:38 +0800 Subject: btrfs: use correct offset for reloc_inode in prealloc_file_extent_cluster() In prealloc_file_extent_cluster(), btrfs_check_data_free_space() uses wrong file offset for reloc_inode, it uses cluster->start and cluster->end, which indeed are extent's bytenr. The correct value should be cluster->[start|end] minus block group's start bytenr. start bytenr cluster->start | | extent | extent | ...| extent | |----------------------------------------------------------------| | block group reloc_inode | Signed-off-by: Wang Xiaoguang Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 27480ef..71b4b70 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3038,12 +3038,14 @@ int prealloc_file_extent_cluster(struct inode *inode, u64 num_bytes; int nr = 0; int ret = 0; + u64 prealloc_start = cluster->start - offset; + u64 prealloc_end = cluster->end - offset; BUG_ON(cluster->start != cluster->boundary[0]); inode_lock(inode); - ret = btrfs_check_data_free_space(inode, cluster->start, - cluster->end + 1 - cluster->start); + ret = btrfs_check_data_free_space(inode, prealloc_start, + prealloc_end + 1 - prealloc_start); if (ret) goto out; @@ -3064,8 +3066,8 @@ int prealloc_file_extent_cluster(struct inode *inode, break; nr++; } - btrfs_free_reserved_data_space(inode, cluster->start, - cluster->end + 1 - cluster->start); + btrfs_free_reserved_data_space(inode, prealloc_start, + prealloc_end + 1 - prealloc_start); out: inode_unlock(inode); return ret; -- cgit v0.10.2 From 4824f1f412f75e9f84b9cecbde828e8f4699f82d Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Mon, 25 Jul 2016 15:51:39 +0800 Subject: btrfs: divide btrfs_update_reserved_bytes() into two functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch divides btrfs_update_reserved_bytes() into btrfs_add_reserved_bytes() and btrfs_free_reserved_bytes(), and next patch will extend btrfs_add_reserved_bytes()to fix some false ENOSPC error, please see later patch for detailed info. Signed-off-by: Wang Xiaoguang Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d108727..f1121db 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -104,9 +104,10 @@ static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); -static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, - int delalloc); +static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int delalloc); +static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int delalloc); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); int btrfs_pin_extent(struct btrfs_root *root, @@ -6497,19 +6498,14 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) } /** - * btrfs_update_reserved_bytes - update the block_group and space info counters + * btrfs_add_reserved_bytes - update the block_group and space info counters * @cache: The cache we are manipulating * @num_bytes: The number of bytes in question * @reserve: One of the reservation enums * @delalloc: The blocks are allocated for the delalloc write * - * This is called by the allocator when it reserves space, or by somebody who is - * freeing space that was never actually used on disk. For example if you - * reserve some space for a new leaf in transaction A and before transaction A - * commits you free that leaf, you call this with reserve set to 0 in order to - * clear the reservation. - * - * Metadata reservations should be called with RESERVE_ALLOC so we do the proper + * This is called by the allocator when it reserves space. Metadata + * reservations should be called with RESERVE_ALLOC so we do the proper * ENOSPC accounting. For data we handle the reservation through clearing the * delalloc bits in the io_tree. We have to do this since we could end up * allocating less disk space for the amount of data we have reserved in the @@ -6519,44 +6515,65 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) * make the reservation and return -EAGAIN, otherwise this function always * succeeds. */ -static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int delalloc) +static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; spin_lock(&space_info->lock); spin_lock(&cache->lock); - if (reserve != RESERVE_FREE) { - if (cache->ro) { - ret = -EAGAIN; - } else { - cache->reserved += num_bytes; - space_info->bytes_reserved += num_bytes; - if (reserve == RESERVE_ALLOC) { - trace_btrfs_space_reservation(cache->fs_info, - "space_info", space_info->flags, - num_bytes, 0); - space_info->bytes_may_use -= num_bytes; - } - - if (delalloc) - cache->delalloc_bytes += num_bytes; - } + if (cache->ro) { + ret = -EAGAIN; } else { - if (cache->ro) - space_info->bytes_readonly += num_bytes; - cache->reserved -= num_bytes; - space_info->bytes_reserved -= num_bytes; + cache->reserved += num_bytes; + space_info->bytes_reserved += num_bytes; + if (reserve == RESERVE_ALLOC) { + trace_btrfs_space_reservation(cache->fs_info, + "space_info", space_info->flags, + num_bytes, 0); + space_info->bytes_may_use -= num_bytes; + } if (delalloc) - cache->delalloc_bytes -= num_bytes; + cache->delalloc_bytes += num_bytes; } spin_unlock(&cache->lock); spin_unlock(&space_info->lock); return ret; } +/** + * btrfs_free_reserved_bytes - update the block_group and space info counters + * @cache: The cache we are manipulating + * @num_bytes: The number of bytes in question + * @delalloc: The blocks are allocated for the delalloc write + * + * This is called by somebody who is freeing space that was never actually used + * on disk. For example if you reserve some space for a new leaf in transaction + * A and before transaction A commits you free that leaf, you call this with + * reserve set to 0 in order to clear the reservation. + */ + +static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int delalloc) +{ + struct btrfs_space_info *space_info = cache->space_info; + int ret = 0; + + spin_lock(&space_info->lock); + spin_lock(&cache->lock); + if (cache->ro) + space_info->bytes_readonly += num_bytes; + cache->reserved -= num_bytes; + space_info->bytes_reserved -= num_bytes; + + if (delalloc) + cache->delalloc_bytes -= num_bytes; + spin_unlock(&cache->lock); + spin_unlock(&space_info->lock); + return ret; +} void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -7191,7 +7208,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); + btrfs_free_reserved_bytes(cache, buf->len, 0); btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; @@ -7763,8 +7780,8 @@ checks: search_start - offset); BUG_ON(offset > search_start); - ret = btrfs_update_reserved_bytes(block_group, num_bytes, - alloc_type, delalloc); + ret = btrfs_add_reserved_bytes(block_group, num_bytes, + alloc_type, delalloc); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); goto loop; @@ -7995,7 +8012,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, if (btrfs_test_opt(root->fs_info, DISCARD)) ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); - btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); + btrfs_free_reserved_bytes(cache, len, delalloc); trace_btrfs_reserved_extent_free(root, start, len); } @@ -8223,8 +8240,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, if (!block_group) return -EINVAL; - ret = btrfs_update_reserved_bytes(block_group, ins->offset, - RESERVE_ALLOC_NO_ACCOUNT, 0); + ret = btrfs_add_reserved_bytes(block_group, ins->offset, + RESERVE_ALLOC_NO_ACCOUNT, 0); BUG_ON(ret); /* logic error */ ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); -- cgit v0.10.2 From 18513091af9483ba84328d42092bd4d42a3c958f Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Mon, 25 Jul 2016 15:51:40 +0800 Subject: btrfs: update btrfs_space_info's bytes_may_use timely This patch can fix some false ENOSPC errors, below test script can reproduce one false ENOSPC error: #!/bin/bash dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=128 dev=$(losetup --show -f fs.img) mkfs.btrfs -f -M $dev mkdir /tmp/mntpoint mount $dev /tmp/mntpoint cd /tmp/mntpoint xfs_io -f -c "falloc 0 $((64*1024*1024))" testfile Above script will fail for ENOSPC reason, but indeed fs still has free space to satisfy this request. Please see call graph: btrfs_fallocate() |-> btrfs_alloc_data_chunk_ondemand() | bytes_may_use += 64M |-> btrfs_prealloc_file_range() |-> btrfs_reserve_extent() |-> btrfs_add_reserved_bytes() | alloc_type is RESERVE_ALLOC_NO_ACCOUNT, so it does not | change bytes_may_use, and bytes_reserved += 64M. Now | bytes_may_use + bytes_reserved == 128M, which is greater | than btrfs_space_info's total_bytes, false enospc occurs. | Note, the bytes_may_use decrease operation will be done in | end of btrfs_fallocate(), which is too late. Here is another simple case for buffered write: CPU 1 | CPU 2 | |-> cow_file_range() |-> __btrfs_buffered_write() |-> btrfs_reserve_extent() | | | | | | | | | ..... | |-> btrfs_check_data_free_space() | | | | |-> extent_clear_unlock_delalloc() | In CPU 1, btrfs_reserve_extent()->find_free_extent()-> btrfs_add_reserved_bytes() do not decrease bytes_may_use, the decrease operation will be delayed to be done in extent_clear_unlock_delalloc(). Assume in this case, btrfs_reserve_extent() reserved 128MB data, CPU2's btrfs_check_data_free_space() tries to reserve 100MB data space. If 100MB > data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - data_sinfo->bytes_may_use btrfs_check_data_free_space() will try to allcate new data chunk or call btrfs_start_delalloc_roots(), or commit current transaction in order to reserve some free space, obviously a lot of work. But indeed it's not necessary as long as decreasing bytes_may_use timely, we still have free space, decreasing 128M from bytes_may_use. To fix this issue, this patch chooses to update bytes_may_use for both data and metadata in btrfs_add_reserved_bytes(). For compress path, real extent length may not be equal to file content length, so introduce a ram_bytes argument for btrfs_reserve_extent(), find_free_extent() and btrfs_add_reserved_bytes(), it's becasue bytes_may_use is increased by file content length. Then compress path can update bytes_may_use correctly. Also now we can discard RESERVE_ALLOC_NO_ACCOUNT, RESERVE_ALLOC and RESERVE_FREE. As we know, usually EXTENT_DO_ACCOUNTING is used for error path. In run_delalloc_nocow(), for inode marked as NODATACOW or extent marked as PREALLOC, we also need to update bytes_may_use, but can not pass EXTENT_DO_ACCOUNTING, because it also clears metadata reservation, so here we introduce EXTENT_CLEAR_DATA_RESV flag to indicate btrfs_clear_bit_hook() to update btrfs_space_info's bytes_may_use. Meanwhile __btrfs_prealloc_file_range() will call btrfs_free_reserved_data_space() internally for both sucessful and failed path, btrfs_prealloc_file_range()'s callers does not need to call btrfs_free_reserved_data_space() any more. Signed-off-by: Wang Xiaoguang Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index acc6850..09cdff0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2579,7 +2579,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins); -int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, +int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data, int delalloc); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f1121db..133c93b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -60,21 +60,6 @@ enum { CHUNK_ALLOC_FORCE = 2, }; -/* - * Control how reservations are dealt with. - * - * RESERVE_FREE - freeing a reservation. - * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for - * ENOSPC accounting - * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update - * bytes_may_use as the ENOSPC accounting is done elsewhere - */ -enum { - RESERVE_FREE = 0, - RESERVE_ALLOC = 1, - RESERVE_ALLOC_NO_ACCOUNT = 2, -}; - static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -105,7 +90,7 @@ static int find_next_key(struct btrfs_path *path, int level, static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int delalloc); + u64 ram_bytes, u64 num_bytes, int delalloc); static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, u64 num_bytes, int delalloc); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, @@ -3502,7 +3487,6 @@ again: dcs = BTRFS_DC_SETUP; else if (ret == -ENOSPC) set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); - btrfs_free_reserved_data_space(inode, 0, num_pages); out_put: iput(inode); @@ -6500,8 +6484,9 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) /** * btrfs_add_reserved_bytes - update the block_group and space info counters * @cache: The cache we are manipulating + * @ram_bytes: The number of bytes of file content, and will be same to + * @num_bytes except for the compress path. * @num_bytes: The number of bytes in question - * @reserve: One of the reservation enums * @delalloc: The blocks are allocated for the delalloc write * * This is called by the allocator when it reserves space. Metadata @@ -6516,7 +6501,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) * succeeds. */ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int delalloc) + u64 ram_bytes, u64 num_bytes, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; @@ -6528,13 +6513,11 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, } else { cache->reserved += num_bytes; space_info->bytes_reserved += num_bytes; - if (reserve == RESERVE_ALLOC) { - trace_btrfs_space_reservation(cache->fs_info, - "space_info", space_info->flags, - num_bytes, 0); - space_info->bytes_may_use -= num_bytes; - } + trace_btrfs_space_reservation(cache->fs_info, + "space_info", space_info->flags, + ram_bytes, 0); + space_info->bytes_may_use -= ram_bytes; if (delalloc) cache->delalloc_bytes += num_bytes; } @@ -7433,9 +7416,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache, * the free space extent currently. */ static noinline int find_free_extent(struct btrfs_root *orig_root, - u64 num_bytes, u64 empty_size, - u64 hint_byte, struct btrfs_key *ins, - u64 flags, int delalloc) + u64 ram_bytes, u64 num_bytes, u64 empty_size, + u64 hint_byte, struct btrfs_key *ins, + u64 flags, int delalloc) { int ret = 0; struct btrfs_root *root = orig_root->fs_info->extent_root; @@ -7447,8 +7430,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, struct btrfs_space_info *space_info; int loop = 0; int index = __get_raid_index(flags); - int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? - RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; bool failed_cluster_refill = false; bool failed_alloc = false; bool use_cluster = true; @@ -7780,8 +7761,8 @@ checks: search_start - offset); BUG_ON(offset > search_start); - ret = btrfs_add_reserved_bytes(block_group, num_bytes, - alloc_type, delalloc); + ret = btrfs_add_reserved_bytes(block_group, ram_bytes, + num_bytes, delalloc); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); goto loop; @@ -7953,7 +7934,7 @@ again: up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data, int delalloc) @@ -7965,8 +7946,8 @@ int btrfs_reserve_extent(struct btrfs_root *root, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, - flags, delalloc); + ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, + hint_byte, ins, flags, delalloc); if (!ret && !is_data) { btrfs_dec_block_group_reservations(root->fs_info, ins->objectid); @@ -7975,6 +7956,7 @@ again: num_bytes = min(num_bytes >> 1, ins->offset); num_bytes = round_down(num_bytes, root->sectorsize); num_bytes = max(num_bytes, min_alloc_size); + ram_bytes = num_bytes; if (num_bytes == min_alloc_size) final_tried = true; goto again; @@ -8241,7 +8223,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, return -EINVAL; ret = btrfs_add_reserved_bytes(block_group, ins->offset, - RESERVE_ALLOC_NO_ACCOUNT, 0); + ins->offset, 0); BUG_ON(ret); /* logic error */ ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); @@ -8385,7 +8367,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, empty_size, hint, &ins, 0, 0); if (ret) goto out_unuse; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c0c1c4f..b52ca5d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -20,6 +20,7 @@ #define EXTENT_DAMAGED (1U << 14) #define EXTENT_NORESERVE (1U << 15) #define EXTENT_QGROUP_RESERVED (1U << 16) +#define EXTENT_CLEAR_DATA_RESV (1U << 17) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5842423..3391f2ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2675,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode, alloc_start = round_down(offset, blocksize); alloc_end = round_up(offset + len, blocksize); + cur_offset = alloc_start; /* Make sure we aren't being give some crap mode */ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) @@ -2767,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode, /* First, check if we exceed the qgroup limit */ INIT_LIST_HEAD(&reserve_list); - cur_offset = alloc_start; while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); @@ -2794,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode, last_byte - cur_offset); if (ret < 0) break; + } else { + /* + * Do not need to reserve unwritten extent for this + * range, free reserved data space first, otherwise + * it'll result in false ENOSPC error. + */ + btrfs_free_reserved_data_space(inode, cur_offset, + last_byte - cur_offset); } free_extent_map(em); cur_offset = last_byte; @@ -2811,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode, range->start, range->len, 1 << inode->i_blkbits, offset + len, &alloc_hint); + else + btrfs_free_reserved_data_space(inode, range->start, + range->len); list_del(&range->list); kfree(range); } @@ -2845,18 +2856,11 @@ out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state, GFP_KERNEL); out: - /* - * As we waited the extent range, the data_rsv_map must be empty - * in the range, as written data range will be released from it. - * And for prealloacted extent, it will also be released when - * its metadata is written. - * So this is completely used as cleanup. - */ - btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start); inode_unlock(inode); /* Let go of our reservation. */ - btrfs_free_reserved_data_space(inode, alloc_start, - alloc_end - alloc_start); + if (ret != 0) + btrfs_free_reserved_data_space(inode, alloc_start, + alloc_end - cur_offset); return ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index aa6faba..359ee86 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -495,10 +495,9 @@ again: ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); if (ret) { - btrfs_delalloc_release_space(inode, 0, prealloc); + btrfs_delalloc_release_metadata(inode, prealloc); goto out_put; } - btrfs_free_reserved_data_space(inode, 0, prealloc); ret = btrfs_write_out_ino_cache(root, trans, path, inode); out_put: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2963662..bcea200 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -566,6 +566,8 @@ cont: PAGE_SET_WRITEBACK | page_error_op | PAGE_END_WRITEBACK); + btrfs_free_reserved_data_space_noquota(inode, start, + end - start + 1); goto free_pages_out; } } @@ -742,7 +744,7 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); - ret = btrfs_reserve_extent(root, + ret = btrfs_reserve_extent(root, async_extent->ram_size, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, &ins, 1, 1); @@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode, EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); - + btrfs_free_reserved_data_space_noquota(inode, start, + end - start + 1); *nr_written = *nr_written + (end - start + PAGE_SIZE) / PAGE_SIZE; *page_started = 1; @@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode, unsigned long op; cur_alloc_size = disk_num_bytes; - ret = btrfs_reserve_extent(root, cur_alloc_size, + ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, root->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret < 0) @@ -1489,8 +1492,10 @@ out_check: extent_clear_unlock_delalloc(inode, cur_offset, cur_offset + num_bytes - 1, locked_page, EXTENT_LOCKED | - EXTENT_DELALLOC, PAGE_UNLOCK | - PAGE_SET_PRIVATE2); + EXTENT_DELALLOC | + EXTENT_CLEAR_DATA_RESV, + PAGE_UNLOCK | PAGE_SET_PRIVATE2); + if (!nolock && nocow) btrfs_end_write_no_snapshoting(root); cur_offset = extent_end; @@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode, return; if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID - && do_list && !(state->state & EXTENT_NORESERVE)) + && do_list && !(state->state & EXTENT_NORESERVE) + && (*bits & (EXTENT_DO_ACCOUNTING | + EXTENT_CLEAR_DATA_RESV))) btrfs_free_reserved_data_space_noquota(inode, state->start, len); @@ -7252,7 +7259,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, int ret; alloc_hint = get_extent_allocation_hint(inode, start, len); - ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, + ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret) return ERR_PTR(ret); @@ -7752,6 +7759,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ret = PTR_ERR(em2); goto unlock_err; } + /* + * For inode marked NODATACOW or extent marked PREALLOC, + * use the existing or preallocated extent, so does not + * need to adjust btrfs_space_info's bytes_may_use. + */ + btrfs_free_reserved_data_space_noquota(inode, + start, len); goto unlock; } } @@ -7786,7 +7800,6 @@ unlock: i_size_write(inode, start + len); adjust_dio_outstanding_extents(inode, dio_data, len); - btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; dio_data->unsubmitted_oe_range_end = start + len; @@ -10306,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, u64 last_alloc = (u64)-1; int ret = 0; bool own_trans = true; + u64 end = start + num_bytes - 1; if (trans) own_trans = false; @@ -10327,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, * sized chunks. */ cur_bytes = min(cur_bytes, last_alloc); - ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, - *alloc_hint, &ins, 1, 0); + ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, + min_size, 0, *alloc_hint, &ins, 1, 0); if (ret) { if (own_trans) btrfs_end_transaction(trans, root); @@ -10414,6 +10428,9 @@ next: if (own_trans) btrfs_end_transaction(trans, root); } + if (cur_offset < end) + btrfs_free_reserved_data_space(inode, cur_offset, + end - cur_offset + 1); return ret; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 71b4b70..8a2c2a0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3040,6 +3040,7 @@ int prealloc_file_extent_cluster(struct inode *inode, int ret = 0; u64 prealloc_start = cluster->start - offset; u64 prealloc_end = cluster->end - offset; + u64 cur_offset; BUG_ON(cluster->start != cluster->boundary[0]); inode_lock(inode); @@ -3049,6 +3050,7 @@ int prealloc_file_extent_cluster(struct inode *inode, if (ret) goto out; + cur_offset = prealloc_start; while (nr < cluster->nr) { start = cluster->boundary[nr] - offset; if (nr + 1 < cluster->nr) @@ -3058,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode, lock_extent(&BTRFS_I(inode)->io_tree, start, end); num_bytes = end + 1 - start; + if (cur_offset < start) + btrfs_free_reserved_data_space(inode, cur_offset, + start - cur_offset); ret = btrfs_prealloc_file_range(inode, 0, start, num_bytes, num_bytes, end + 1, &alloc_hint); + cur_offset = end + 1; unlock_extent(&BTRFS_I(inode)->io_tree, start, end); if (ret) break; nr++; } - btrfs_free_reserved_data_space(inode, prealloc_start, - prealloc_end + 1 - prealloc_start); + if (cur_offset < prealloc_end) + btrfs_free_reserved_data_space(inode, cur_offset, + prealloc_end + 1 - cur_offset); out: inode_unlock(inode); return ret; -- cgit v0.10.2 From 9e7cc91a6d18a4973c6d2cc104871439c9e94f3d Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Mon, 1 Aug 2016 13:28:08 +0800 Subject: btrfs: fix fsfreeze hang caused by delayed iputs deal When running fstests generic/068, sometimes we got below deadlock: xfs_io D ffff8800331dbb20 0 6697 6693 0x00000080 ffff8800331dbb20 ffff88007acfc140 ffff880034d895c0 ffff8800331dc000 ffff880032d243e8 fffffffeffffffff ffff880032d24400 0000000000000001 ffff8800331dbb38 ffffffff816a9045 ffff880034d895c0 ffff8800331dbba8 Call Trace: [] schedule+0x35/0x80 [] rwsem_down_read_failed+0xf2/0x140 [] ? __filemap_fdatawrite_range+0xd1/0x100 [] call_rwsem_down_read_failed+0x18/0x30 [] ? btrfs_alloc_block_rsv+0x2c/0xb0 [btrfs] [] percpu_down_read+0x35/0x50 [] __sb_start_write+0x2c/0x40 [] start_transaction+0x2a5/0x4d0 [btrfs] [] btrfs_join_transaction+0x17/0x20 [btrfs] [] btrfs_evict_inode+0x3c4/0x5d0 [btrfs] [] evict+0xba/0x1a0 [] iput+0x196/0x200 [] btrfs_run_delayed_iputs+0x70/0xc0 [btrfs] [] btrfs_commit_transaction+0x928/0xa80 [btrfs] [] btrfs_freeze+0x30/0x40 [btrfs] [] freeze_super+0xf0/0x190 [] do_vfs_ioctl+0x4a5/0x5c0 [] ? do_audit_syscall_entry+0x66/0x70 [] ? syscall_trace_enter_phase1+0x11f/0x140 [] SyS_ioctl+0x79/0x90 [] do_syscall_64+0x62/0x110 [] entry_SYSCALL64_slow_path+0x25/0x25 >From this warning, freeze_super() already holds SB_FREEZE_FS, but btrfs_freeze() will call btrfs_commit_transaction() again, if btrfs_commit_transaction() finds that it has delayed iputs to handle, it'll start_transaction(), which will try to get SB_FREEZE_FS lock again, then deadlock occurs. The root cause is that in btrfs, sync_filesystem(sb) does not make sure all metadata is updated. There still maybe some codes adding delayed iputs, see below sample race window: CPU1 | CPU2 |-> freeze_super() | |-> sync_filesystem(sb); | | |-> cleaner_kthread() | | |-> btrfs_delete_unused_bgs() | | |-> btrfs_remove_chunk() | | |-> btrfs_remove_block_group() | | |-> btrfs_add_delayed_iput() | | |-> sb->s_writers.frozen = SB_FREEZE_FS; | |-> sb_wait_write(sb, SB_FREEZE_FS); | | acquire SB_FREEZE_FS lock. | | | |-> btrfs_freeze() | |-> btrfs_commit_transaction() | |-> btrfs_run_delayed_iputs() | | will handle delayed iputs, | | that means start_transaction() | | will be called, which will try | | to get SB_FREEZE_FS lock. | To fix this issue, introduce a "int fs_frozen" to record internally whether fs has been frozen. If fs has been frozen, we can not handle delayed iputs. Signed-off-by: Wang Xiaoguang Reviewed-by: David Sterba [ add comment to btrfs_freeze ] Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 09cdff0..ec4154f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1080,6 +1080,8 @@ struct btrfs_fs_info { struct list_head pinned_chunks; int creating_free_space_tree; + /* Used to record internally whether fs has been frozen */ + int fs_frozen; }; struct btrfs_subvolume_writers { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7857f64..1706222 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2631,6 +2631,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); + fs_info->fs_frozen = 0; fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 864ce33..4071fe2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb) struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_sb(sb)->tree_root; + root->fs_info->fs_frozen = 1; + /* + * We don't need a barrier here, we'll wait for any transaction that + * could be in progress on other threads (and do delayed iputs that + * we want to avoid on a frozen filesystem), or do the commit + * ourselves. + */ trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { /* no transaction, don't bother */ @@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb) return btrfs_commit_transaction(trans, root); } +static int btrfs_unfreeze(struct super_block *sb) +{ + struct btrfs_root *root = btrfs_sb(sb)->tree_root; + + root->fs_info->fs_frozen = 0; + return 0; +} + static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); @@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = { .statfs = btrfs_statfs, .remount_fs = btrfs_remount, .freeze_fs = btrfs_freeze, + .unfreeze_fs = btrfs_unfreeze, }; static const struct file_operations btrfs_ctl_fops = { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9cca0a7..95d4191 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); + /* + * If fs has been frozen, we can not handle delayed iputs, otherwise + * it'll result in deadlock about SB_FREEZE_FS. + */ if (current != root->fs_info->transaction_kthread && - current != root->fs_info->cleaner_kthread) + current != root->fs_info->cleaner_kthread && + !root->fs_info->fs_frozen) btrfs_run_delayed_iputs(root); return ret; -- cgit v0.10.2 From 28b737f6ede3661fe610937706c4a6f50e9ab769 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 29 Jul 2016 11:09:50 -0700 Subject: Btrfs: clarify do_chunk_alloc()'s return value Function start_transaction() can return ERR_PTR(1) when flush is BTRFS_RESERVE_FLUSH_LIMIT, so the call graph is start_transaction (return ERR_PTR(1)) -> btrfs_block_rsv_add (return 1) -> reserve_metadata_bytes (return 1) -> flush_space (return 1) -> do_chunk_alloc (return 1) With BTRFS_RESERVE_FLUSH_LIMIT, if flush_space is already on the flush_state of ALLOC_CHUNK and it successfully allocates a new chunk, then instead of trying to reserve space again, reserve_metadata_bytes returns 1 immediately. Eventually the callers who call start_transaction() usually just do the IS_ERR() check which ERR_PTR(1) can pass, then it'll get a panic when dereferencing a pointer which is ERR_PTR(1). The following patch fixes the above problem. "btrfs: flush_space: treat return value of do_chunk_alloc properly" https://patchwork.kernel.org/patch/7778651/ This add comments to clarify do_chunk_alloc()'s return value. Signed-off-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 133c93b..d5d3cfb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4457,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans, } } +/* + * If force is CHUNK_ALLOC_FORCE: + * - return 1 if it successfully allocates a chunk, + * - return errors including -ENOSPC otherwise. + * If force is NOT CHUNK_ALLOC_FORCE: + * - return 0 if it doesn't need to allocate a new chunk, + * - return 1 if it successfully allocates a chunk, + * - return errors including -ENOSPC otherwise. + */ static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 flags, int force) { -- cgit v0.10.2 From 142388194191a3edc9ba01cfcfd8b691e0971fb2 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 22 Jul 2016 06:04:53 +0800 Subject: btrfs: do not background blkdev_put() At the end of unmount/dev-delete, if the device exclusive open is not actually closed, then there might be a race with another program in the userland who is trying to open the device in exclusive mode and it may fail for eg: unmount /btrfs; fsck /dev/x btrfs dev del /dev/x /btrfs; fsck /dev/x so here background blkdev_put() is not a choice Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e217035..9a6f0dc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work) struct btrfs_device *device; device = container_of(work, struct btrfs_device, rcu_work); - - if (device->bdev) - blkdev_put(device->bdev, device->mode); - rcu_string_free(device->name); kfree(device); } @@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head) schedule_work(&device->rcu_work); } +static void btrfs_close_bdev(struct btrfs_device *device) +{ + if (device->bdev && device->writeable) { + sync_blockdev(device->bdev); + invalidate_bdev(device->bdev); + } + + if (device->bdev) + blkdev_put(device->bdev, device->mode); +} + static void btrfs_close_one_device(struct btrfs_device *device) { struct btrfs_fs_devices *fs_devices = device->fs_devices; @@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) if (device->missing) fs_devices->missing_devices--; - if (device->bdev && device->writeable) { - sync_blockdev(device->bdev); - invalidate_bdev(device->bdev); - } + btrfs_close_bdev(device); new_device = btrfs_alloc_device(NULL, &device->devid, device->uuid); @@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); } + btrfs_close_bdev(device); + call_rcu(&device->rcu, free_device); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; @@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, /* zero out the old super if it is writable */ btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } + + btrfs_close_bdev(srcdev); + call_rcu(&srcdev->rcu, free_device); /* @@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, * the device_list_mutex lock. */ btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); + + btrfs_close_bdev(tgtdev); call_rcu(&tgtdev->rcu, free_device); } -- cgit v0.10.2 From 187ee58c62c1d0d238d3dc4835869d33e1869906 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 18 Aug 2016 15:30:06 -0400 Subject: Btrfs: fix em leak in find_first_block_group We need to call free_extent_map() on the em we look up. Signed-off-by: Josef Bacik Reviewed-by: Omar Sandoval Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d5d3cfb..60d4ae7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9887,6 +9887,7 @@ static int find_first_block_group(struct btrfs_root *root, } else { ret = 0; } + free_extent_map(em); goto out; } path->slots[0]++; -- cgit v0.10.2 From 35bbb97fc898aeb874cb7c8b746f091caa359994 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 17 Aug 2016 21:58:33 -0400 Subject: btrfs: don't create or leak aliased root while cleaning up orphans commit 909c3a22da3 (Btrfs: fix loading of orphan roots leading to BUG_ON) avoids the BUG_ON but can add an aliased root to the dead_roots list or leak the root. Since we've already been loading roots into the radix tree, we should use it before looking the root up on disk. Cc: # 4.5 Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1706222..edda471 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1624,8 +1624,8 @@ fail: return ret; } -static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c9d42c9..e0efe45 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, struct btrfs_key *location); int btrfs_init_fs_root(struct btrfs_root *root); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7fd7e18..0912960 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) root_key.objectid = key.offset; key.offset++; + /* + * The root might have been inserted already, as before we look + * for orphan roots, log replay might have happened, which + * triggers a transaction commit and qgroup accounting, which + * in turn reads and inserts fs roots while doing backref + * walking. + */ + root = btrfs_lookup_fs_root(tree_root->fs_info, + root_key.objectid); + if (root) { + WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, + &root->state)); + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); + continue; + } + root = btrfs_read_fs_root(tree_root, &root_key); err = PTR_ERR_OR_ZERO(root); if (err && err != -ENOENT) { @@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); err = btrfs_insert_fs_root(root->fs_info, root); - /* - * The root might have been inserted already, as before we look - * for orphan roots, log replay might have happened, which - * triggers a transaction commit and qgroup accounting, which - * in turn reads and inserts fs roots while doing backref - * walking. - */ - if (err == -EEXIST) - err = 0; if (err) { + BUG_ON(err == -EEXIST); btrfs_free_fs_root(root); break; } -- cgit v0.10.2 From 053ab70f0604224c7893b43f9d9d5efa283580d6 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 23 Aug 2016 17:37:45 -0700 Subject: Btrfs: check btree node's nritems When btree node (level = 1) has nritems which equals to zero, we can end up with panic due to insert_ptr()'s BUG_ON(slot > nritems); where slot is 1 and nritems is 0, as copy_for_split() calls insert_ptr(.., path->slots[1] + 1, ...); A invalid value results in the whole mess, this adds the check for btree's node nritems so that we stop reading block when when something is wrong. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index edda471..474209f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -613,6 +613,19 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } +static int check_node(struct btrfs_root *root, struct extent_buffer *node) +{ + unsigned long nr = btrfs_header_nritems(node); + + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { + btrfs_crit(root->fs_info, + "corrupt node: block %llu root %llu nritems %lu", + node->start, root->objectid, nr); + return -EIO; + } + return 0; +} + static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -683,6 +696,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ret = -EIO; } + if (found_level > 0 && check_node(root, eb)) + ret = -EIO; + if (!ret) set_extent_buffer_uptodate(eb); err: -- cgit v0.10.2 From 1ba98d086fe3a14d6a31f2f66dbab70c45d00f63 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 23 Aug 2016 15:22:58 -0700 Subject: Btrfs: detect corruption when non-root leaf has zero item Right now we treat leaf which has zero item as a valid one because we could have an empty tree, that is, a root that is also a leaf without any item, however, in the same case but when the leaf is not a root, we can end up with hitting the BUG_ON(1) in btrfs_extend_item() called by setup_inline_extent_backref(). This makes us check the situation as a corruption if leaf is not its own root. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 474209f..70e76ad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -560,8 +560,29 @@ static noinline int check_leaf(struct btrfs_root *root, u32 nritems = btrfs_header_nritems(leaf); int slot; - if (nritems == 0) + if (nritems == 0) { + struct btrfs_root *check_root; + + key.objectid = btrfs_header_owner(leaf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + check_root = btrfs_get_fs_root(root->fs_info, &key, false); + /* + * The only reason we also check NULL here is that during + * open_ctree() some roots has not yet been set up. + */ + if (!IS_ERR_OR_NULL(check_root)) { + /* if leaf is the root, then it's fine */ + if (leaf->start != + btrfs_root_bytenr(&check_root->root_item)) { + CORRUPT("non-root leaf's nritems is 0", + leaf, root, 0); + return -EIO; + } + } return 0; + } /* Check the 0 item */ if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != -- cgit v0.10.2 From 28a235931b56d4e7bdd51f6733daf95f2b269da8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 23 Aug 2016 21:13:51 +0100 Subject: Btrfs: fix lockdep warning on deadlock against an inode's log mutex Commit 44f714dae50a ("Btrfs: improve performance on fsync against new inode after rename/unlink"), which landed in 4.8-rc2, introduced a possibility for a deadlock due to double locking of an inode's log mutex by the same task, which lockdep reports with: [23045.433975] ============================================= [23045.434748] [ INFO: possible recursive locking detected ] [23045.435426] 4.7.0-rc6-btrfs-next-34+ #1 Not tainted [23045.436044] --------------------------------------------- [23045.436044] xfs_io/3688 is trying to acquire lock: [23045.436044] (&ei->log_mutex){+.+...}, at: [] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] but task is already holding lock: [23045.436044] (&ei->log_mutex){+.+...}, at: [] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] other info that might help us debug this: [23045.436044] Possible unsafe locking scenario: [23045.436044] CPU0 [23045.436044] ---- [23045.436044] lock(&ei->log_mutex); [23045.436044] lock(&ei->log_mutex); [23045.436044] *** DEADLOCK *** [23045.436044] May be due to missing lock nesting notation [23045.436044] 3 locks held by xfs_io/3688: [23045.436044] #0: (&sb->s_type->i_mutex_key#15){+.+...}, at: [] btrfs_sync_file+0x14e/0x425 [btrfs] [23045.436044] #1: (sb_internal#2){.+.+.+}, at: [] __sb_start_write+0x5f/0xb0 [23045.436044] #2: (&ei->log_mutex){+.+...}, at: [] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] stack backtrace: [23045.436044] CPU: 4 PID: 3688 Comm: xfs_io Not tainted 4.7.0-rc6-btrfs-next-34+ #1 [23045.436044] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [23045.436044] 0000000000000000 ffff88022f5f7860 ffffffff8127074d ffffffff82a54b70 [23045.436044] ffffffff82a54b70 ffff88022f5f7920 ffffffff81092897 ffff880228015d68 [23045.436044] 0000000000000000 ffffffff82a54b70 ffffffff829c3f00 ffff880228015d68 [23045.436044] Call Trace: [23045.436044] [] dump_stack+0x67/0x90 [23045.436044] [] __lock_acquire+0xcbb/0xe4e [23045.436044] [] ? mark_lock+0x24/0x201 [23045.436044] [] ? mark_held_locks+0x5e/0x74 [23045.436044] [] lock_acquire+0x12f/0x1c3 [23045.436044] [] ? lock_acquire+0x12f/0x1c3 [23045.436044] [] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [] mutex_lock_nested+0x77/0x3a7 [23045.436044] [] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [] ? btrfs_release_delayed_node+0xb/0xd [btrfs] [23045.436044] [] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [] ? vprintk_emit+0x453/0x465 [23045.436044] [] btrfs_log_inode+0x66e/0xc95 [btrfs] [23045.436044] [] log_new_dir_dentries+0x26c/0x359 [btrfs] [23045.436044] [] btrfs_log_inode_parent+0x4a6/0x628 [btrfs] [23045.436044] [] btrfs_log_dentry_safe+0x5a/0x75 [btrfs] [23045.436044] [] btrfs_sync_file+0x304/0x425 [btrfs] [23045.436044] [] vfs_fsync_range+0x8c/0x9e [23045.436044] [] vfs_fsync+0x1c/0x1e [23045.436044] [] do_fsync+0x31/0x4a [23045.436044] [] SyS_fsync+0x10/0x14 [23045.436044] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [23045.436044] [] ? trace_hardirqs_off_caller+0x3f/0xaa An example reproducer for this is: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/dir $ touch /mnt/dir/foo $ sync $ mv /mnt/dir/foo /mnt/dir/bar $ touch /mnt/dir/foo $ xfs_io -c "fsync" /mnt/dir/bar This is because while logging the inode of file bar we end up logging its parent directory (since its inode has an unlink_trans field matching the current transaction id due to the rename operation), which in turn logs the inodes for all its new dentries, so that the new inode for the new file named foo gets logged which in turn triggered another logging attempt for the inode we are fsync'ing, since that inode had an old name that corresponds to the name of the new inode. So fix this by ensuring that when logging the inode for a new dentry that has a name matching an old name of some other inode, we don't log again the original inode that we are fsync'ing. Fixes: 44f714dae50a ("Btrfs: improve performance on fsync against new inode after rename/unlink") Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3391f2ad..fea31a4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2070,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } trans->sync = true; - btrfs_init_log_ctx(&ctx); + btrfs_init_log_ctx(&ctx, inode); ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ffe92da..e935035 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2823,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ mutex_unlock(&root->log_mutex); - btrfs_init_log_ctx(&root_log_ctx); + btrfs_init_log_ctx(&root_log_ctx, NULL); mutex_lock(&log_root_tree->log_mutex); atomic_inc(&log_root_tree->log_batch); @@ -4757,7 +4757,8 @@ again: if (ret < 0) { err = ret; goto out_unlock; - } else if (ret > 0) { + } else if (ret > 0 && ctx && + other_ino != btrfs_ino(ctx->inode)) { struct btrfs_key inode_key; struct inode *other_inode; diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index a9f1b75..ab858e3 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -30,15 +30,18 @@ struct btrfs_log_ctx { int log_transid; int io_err; bool log_new_dentries; + struct inode *inode; struct list_head list; }; -static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) +static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, + struct inode *inode) { ctx->log_ret = 0; ctx->log_transid = 0; ctx->io_err = 0; ctx->log_new_dentries = false; + ctx->inode = inode; INIT_LIST_HEAD(&ctx->list); } -- cgit v0.10.2 From 5bb53c0fb8e0fc2e34287d5d0fcadc784de913e1 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 23 Aug 2016 18:55:31 +0300 Subject: fs/block_dev: fix potential NULL ptr deref in freeze_bdev() Calling freeze_bdev() twice on the same block device without mounted filesystem get_super() will return NULL, which will lead to NULL-ptr dereference later in drop_super(). Check get_super() result to fix that. Note, that this is a purely theoretical issue. We have only 3 freeze_bdev() callers. 2 of them are in filesystem code and used on a device with mounted fs. The third one in lock_fs() has protection in upper-layer code against freezing block device the second time without thawing it first. Signed-off-by: Andrey Ryabinin Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/fs/block_dev.c b/fs/block_dev.c index e17bdbd..08ae993 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -249,7 +249,8 @@ struct super_block *freeze_bdev(struct block_device *bdev) * thaw_bdev drops it. */ sb = get_super(bdev); - drop_super(sb); + if (sb) + drop_super(sb); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } -- cgit v0.10.2 From 468c298ad3ed3f0d94a65f8ca00f6bfc6c2b4e33 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 25 Aug 2016 08:56:44 -0600 Subject: Revert "floppy: fix open(O_ACCMODE) for ioctl-only open" This reverts commit ff06db1efb2ad6db06eb5b99b88a0c15a9cc9b0e. diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b71a9c7..c557057 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3663,6 +3663,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) opened_bdev[drive] = bdev; + if (!(mode & (FMODE_READ|FMODE_WRITE))) { + res = -EINVAL; + goto out; + } + res = -ENXIO; if (!floppy_track_buffer) { @@ -3706,15 +3711,13 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - if (mode & (FMODE_READ|FMODE_WRITE)) { - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) - goto out; - } + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; res = -EROFS; -- cgit v0.10.2 From f2791e7eadf437633f30faa51b30878cf15650be Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 25 Aug 2016 08:56:51 -0600 Subject: Revert "floppy: refactor open() flags handling" This reverts commit 09954bad448791ef01202351d437abdd9497a804. diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c557057..e3d8e4c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3663,11 +3663,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) opened_bdev[drive] = bdev; - if (!(mode & (FMODE_READ|FMODE_WRITE))) { - res = -EINVAL; - goto out; - } - res = -ENXIO; if (!floppy_track_buffer) { @@ -3711,20 +3706,21 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) - goto out; - - res = -EROFS; - - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) - goto out; - + if (!(mode & FMODE_NDELAY)) { + if (mode & (FMODE_READ|FMODE_WRITE)) { + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; + } + res = -EROFS; + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) + goto out; + } mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; -- cgit v0.10.2 From 07c72d7d54d138eb2ca37709a5a3d55fbcc01536 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 24 Aug 2016 13:59:17 -0500 Subject: i40iw: Send last streaming mode message for loopback connections Send a zero length last streaming mode message for loopback connections to synchronize between accepting QP and connecting QP. This avoids data transfer to start on the accepting QP before the connecting QP is in RTS. Also remove function i40iw_loopback_nop() as it is no longer used. Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Tatyana Nikolova Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 6434398..7ca0638 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3347,26 +3347,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp) } /** - * i40iw_loopback_nop - Send a nop - * @qp: associated hw qp - */ -static void i40iw_loopback_nop(struct i40iw_sc_qp *qp) -{ - u64 *wqe; - u64 header; - - wqe = qp->qp_uk.sq_base->elem; - set_64bit_val(wqe, 0, 0); - set_64bit_val(wqe, 8, 0); - set_64bit_val(wqe, 16, 0); - - header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) | - LS_64(0, I40IWQPSQ_SIGCOMPL) | - LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, header); -} - -/** * i40iw_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection */ @@ -3638,7 +3618,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - i40iw_loopback_nop(&iwqp->sc_qp); + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0); } if (iwqp->page) -- cgit v0.10.2 From 90a56f72edb088c678083c32d05936c7c8d9a948 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 12 Aug 2016 15:11:28 +0300 Subject: Bluetooth: Fix bt_sock_recvmsg when MSG_TRUNC is not set Commit b5f34f9420b50c9b5876b9a2b68e96be6d629054 attempt to introduce proper handling for MSG_TRUNC but recv and variants should still work as read if no flag is passed, but because the code may set MSG_TRUNC to msg->msg_flags that shall not be used as it may cause it to be behave as if MSG_TRUNC is always, so instead of using it this changes the code to use the flags parameter which shall contain the original flags. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ece45e0..0b5f729 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -250,7 +250,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); - if (msg->msg_flags & MSG_TRUNC) + if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; -- cgit v0.10.2 From 4f34228b67246ae3b3ab1dc33b980c77c0650ef4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Aug 2016 16:02:20 +0300 Subject: Bluetooth: Fix hci_sock_recvmsg when MSG_TRUNC is not set Similar to bt_sock_recvmsg MSG_TRUNC shall be checked using the original flags not msg_flags. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6ef8a01..96f04b7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1091,7 +1091,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, skb_free_datagram(sk, skb); - if (msg->msg_flags & MSG_TRUNC) + if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; -- cgit v0.10.2 From 2b721f20770ccbca4d3dad58e1bd44aa570efb3f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 10 Aug 2016 18:52:38 +0200 Subject: drm: Protect fb_defio in drivers with CONFIG_KMS_FBDEV_EMULATION For reasons that entirely elude me fb.h exposes all the structures, even when it is not enabled. Except for special stuff like fb_defio. Which means all the drivers which haven't yet switched over to the defio support in the helpers and still roll their own, will fail to compile when fbdev emulation is disabled. Protect just those bits, as a gnarly reminder that conversion to the core defio helpers would be good. Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470847958-28465-6-git-send-email-daniel.vetter@ffwll.ch Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index df26570..28c1423 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -73,10 +73,12 @@ static void qxl_fb_image_init(struct qxl_fb_image *qxl_fb_image, } } +#ifdef CONFIG_DRM_FBDEV_EMULATION static struct fb_deferred_io qxl_defio = { .delay = QXL_DIRTY_DELAY, .deferred_io = drm_fb_helper_deferred_io, }; +#endif static struct fb_ops qxlfb_ops = { .owner = THIS_MODULE, @@ -313,8 +315,10 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev, goto out_destroy_fbi; } +#ifdef CONFIG_DRM_FBDEV_EMULATION info->fbdefio = &qxl_defio; fb_deferred_io_init(info); +#endif qdev->fbdev_info = info; qdev->fbdev_qfb = &qfbdev->qfb; diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index d5df555..9688bfa 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -203,6 +203,7 @@ static int udl_fb_open(struct fb_info *info, int user) ufbdev->fb_count++; +#ifdef CONFIG_DRM_FBDEV_EMULATION if (fb_defio && (info->fbdefio == NULL)) { /* enable defio at last moment if not disabled by client */ @@ -218,6 +219,7 @@ static int udl_fb_open(struct fb_info *info, int user) info->fbdefio = fbdefio; fb_deferred_io_init(info); } +#endif pr_notice("open /dev/fb%d user=%d fb_info=%p count=%d\n", info->node, user, info, ufbdev->fb_count); @@ -235,12 +237,14 @@ static int udl_fb_release(struct fb_info *info, int user) ufbdev->fb_count--; +#ifdef CONFIG_DRM_FBDEV_EMULATION if ((ufbdev->fb_count == 0) && (info->fbdefio)) { fb_deferred_io_cleanup(info); kfree(info->fbdefio); info->fbdefio = NULL; info->fbops->fb_mmap = udl_fb_mmap; } +#endif pr_warn("released /dev/fb%d user=%d count=%d\n", info->node, user, ufbdev->fb_count); -- cgit v0.10.2 From 869c554808ccf7ddd25be5317073b88ceddb8507 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 25 Aug 2016 14:11:43 -0600 Subject: mmc: fix use-after-free of struct request We call mmc_req_is_special() after having processed a request, but it could be freed after that. Check that ahead of time, and use the cached value. Reported-by: Hans de Goede Tested-by: Hans de Goede Fixes: c2df40dfb8c0 ("drivers: use req op accessor") Signed-off-by: Jens Axboe diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 82503e6..2206d44 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2151,6 +2151,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) struct mmc_card *card = md->queue.card; struct mmc_host *host = card->host; unsigned long flags; + bool req_is_special = mmc_req_is_special(req); if (req && !mq->mqrq_prev->req) /* claim host only for the first request */ @@ -2191,8 +2192,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) } out: - if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || - mmc_req_is_special(req)) + if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || req_is_special) /* * Release host when there are no more requests * and after special request(discard, flush) is done. diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 29578e9..7080572 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -65,6 +65,8 @@ static int mmc_queue_thread(void *d) spin_unlock_irq(q->queue_lock); if (req || mq->mqrq_prev->req) { + bool req_is_special = mmc_req_is_special(req); + set_current_state(TASK_RUNNING); mq->issue_fn(mq, req); cond_resched(); @@ -80,7 +82,7 @@ static int mmc_queue_thread(void *d) * has been finished. Do not assign it to previous * request. */ - if (mmc_req_is_special(req)) + if (req_is_special) mq->mqrq_cur->req = NULL; mq->mqrq_prev->brq.mrq.data = NULL; -- cgit v0.10.2 From a5de125dd46c851fc962806135953c1bd0a0f0df Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 24 Aug 2016 13:32:19 +0000 Subject: tipc: fix the error handling in tipc_udp_enable() Fix to return a negative error code in enable_mcast() error handling case, and release udp socket when necessary. Fixes: d0f91938bede ("tipc: add ip/udp media type") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index b016c01..ae7e14c 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -396,10 +396,13 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); - if (enable_mcast(ub, remote)) + err = enable_mcast(ub, remote); + if (err) goto err; return 0; err: + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); kfree(ub); return err; } -- cgit v0.10.2 From 166ee5b87866de07a3e56c1b757f2b5cabba72a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Aug 2016 09:39:02 -0700 Subject: qdisc: fix a module refcount leak in qdisc_create_dflt() Should qdisc_alloc() fail, we must release the module refcount we got right before. Fixes: 6da7c8fcbcbd ("qdisc: allow setting default queuing discipline") Signed-off-by: Eric Dumazet Acked-by: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e95b67c..657c133 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -643,18 +643,19 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc *sch; if (!try_module_get(ops->owner)) - goto errout; + return NULL; sch = qdisc_alloc(dev_queue, ops); - if (IS_ERR(sch)) - goto errout; + if (IS_ERR(sch)) { + module_put(ops->owner); + return NULL; + } sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) return sch; qdisc_destroy(sch); -errout: return NULL; } EXPORT_SYMBOL(qdisc_create_dflt); -- cgit v0.10.2 From 4f101c47791cdcb831b3ef1f831b1cc51e4fe03c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 24 Aug 2016 11:01:20 -0700 Subject: net: dsa: bcm_sf2: Fix race condition while unmasking interrupts We kept shadow copies of which interrupt sources we have enabled and disabled, but due to an order bug in how intrl2_mask_clear was defined, we could run into the following scenario: CPU0 CPU1 intrl2_1_mask_clear(..) sets INTRL2_CPU_MASK_CLEAR bcm_sf2_switch_1_isr read INTRL2_CPU_STATUS and masks with stale irq1_mask value updates irq1_mask value Which would make us loop again and again trying to process and interrupt we are not clearing since our copy of whether it was enabled before still indicates it was not. Fix this by updating the shadow copy first, and then unasking at the HW level. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 463bed8..dd446e4 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -205,8 +205,8 @@ static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val, \ static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \ u32 mask) \ { \ - intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ priv->irq##which##_mask &= ~(mask); \ + intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ } \ static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \ u32 mask) \ -- cgit v0.10.2 From c3e70edd7c2eed6acd234627a6007627f5c76e8e Mon Sep 17 00:00:00 2001 From: Xander Huff Date: Wed, 24 Aug 2016 16:47:53 -0500 Subject: Revert "phy: IRQ cannot be shared" This reverts: commit 33c133cc7598 ("phy: IRQ cannot be shared") On hardware with multiple PHY devices hooked up to the same IRQ line, allow them to share it. Sergei Shtylyov says: "I'm not sure now what was the reason I concluded that the IRQ sharing was impossible... most probably I thought that the kernel IRQ handling code exited the loop over the IRQ actions once IRQ_HANDLED was returned -- which is obviously not so in reality..." Signed-off-by: Xander Huff Signed-off-by: Nathan Sullivan Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c5dc2c36..c6f6683 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -722,8 +722,10 @@ phy_err: int phy_start_interrupts(struct phy_device *phydev) { atomic_set(&phydev->irq_disable, 0); - if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", - phydev) < 0) { + if (request_irq(phydev->irq, phy_interrupt, + IRQF_SHARED, + "phy_interrupt", + phydev) < 0) { pr_warn("%s: Can't get IRQ %d (PHY)\n", phydev->mdio.bus->name, phydev->irq); phydev->irq = PHY_POLL; -- cgit v0.10.2 From f38ff2ee7727994685494bcc4d7c274b35b5418a Mon Sep 17 00:00:00 2001 From: Anjali Singhai Jain Date: Wed, 24 Aug 2016 17:51:53 -0700 Subject: i40e: Change some init flow for the client This change makes a common flow for Client instance open during init and reset path. The Client subtask can handle both the cases instead of making a separate notify_client_of_open call. Also it may fix a bug during reset where the service task was leaking some memory and causing issues. Change-Id: I7232a32fd52b82e863abb54266fa83122f80a0cd Signed-off-by: Anjali Singhai Jain Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index e1370c5..618f184 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -199,6 +199,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi) { struct i40e_client_instance *cdev; + int ret = 0; if (!vsi) return; @@ -211,7 +212,14 @@ void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi) "Cannot locate client instance open routine\n"); continue; } - cdev->client->ops->open(&cdev->lan_info, cdev->client); + if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state))) { + ret = cdev->client->ops->open(&cdev->lan_info, + cdev->client); + if (!ret) + set_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state); + } } } mutex_unlock(&i40e_client_instance_mutex); @@ -407,12 +415,14 @@ struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, * i40e_client_add_instance - add a client instance struct to the instance list * @pf: pointer to the board struct * @client: pointer to a client struct in the client list. + * @existing: if there was already an existing instance * - * Returns cdev ptr on success, NULL on failure + * Returns cdev ptr on success or if already exists, NULL on failure **/ static struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf, - struct i40e_client *client) + struct i40e_client *client, + bool *existing) { struct i40e_client_instance *cdev; struct netdev_hw_addr *mac = NULL; @@ -421,7 +431,7 @@ struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf, mutex_lock(&i40e_client_instance_mutex); list_for_each_entry(cdev, &i40e_client_instances, list) { if ((cdev->lan_info.pf == pf) && (cdev->client == client)) { - cdev = NULL; + *existing = true; goto out; } } @@ -505,6 +515,7 @@ void i40e_client_subtask(struct i40e_pf *pf) { struct i40e_client_instance *cdev; struct i40e_client *client; + bool existing = false; int ret = 0; if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED)) @@ -528,18 +539,25 @@ void i40e_client_subtask(struct i40e_pf *pf) /* check if L2 VSI is up, if not we are not ready */ if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state)) continue; + } else { + dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n", + client->name); } /* Add the client instance to the instance list */ - cdev = i40e_client_add_instance(pf, client); + cdev = i40e_client_add_instance(pf, client, &existing); if (!cdev) continue; - /* Also up the ref_cnt of no. of instances of this client */ - atomic_inc(&client->ref_cnt); - dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", - client->name, pf->hw.pf_id, - pf->hw.bus.device, pf->hw.bus.func); + if (!existing) { + /* Also up the ref_cnt for no. of instances of this + * client. + */ + atomic_inc(&client->ref_cnt); + dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", + client->name, pf->hw.pf_id, + pf->hw.bus.device, pf->hw.bus.func); + } /* Send an Open request to the client */ atomic_inc(&cdev->ref_cnt); @@ -588,7 +606,8 @@ int i40e_lan_add_device(struct i40e_pf *pf) pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); /* Since in some cases register may have happened before a device gets - * added, we can schedule a subtask to go initiate the clients. + * added, we can schedule a subtask to go initiate the clients if + * they can be launched at probe time. */ pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; i40e_service_event_schedule(pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c6ac7a6..828ed28 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5431,7 +5431,6 @@ int i40e_open(struct net_device *netdev) wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); udp_tunnel_get_rx_info(netdev); - i40e_notify_client_of_netdev_open(vsi); return 0; } -- cgit v0.10.2 From b628d611a2a53858263fc419dba552f32431dba4 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 09:45:39 +0800 Subject: 8139cp: Fix one possible deadloop in cp_rx_poll When cp_rx_poll does not get enough packet, it will check the rx interrupt status again. If so, it will jumpt to rx_status_loop again. But the goto jump resets the rx variable as zero too. As a result, it causes one possible deadloop. Assume this case, rx_status_loop only gets the packet count which is less than budget, and (cpr16(IntrStatus) & cp_rx_intr_mask) condition is always true. It causes the deadloop happens and system is blocked. Signed-off-by: Gao Feng Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index deae10d..5297bf7 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -467,8 +467,8 @@ static int cp_rx_poll(struct napi_struct *napi, int budget) unsigned int rx_tail = cp->rx_tail; int rx; -rx_status_loop: rx = 0; +rx_status_loop: cpw16(IntrStatus, cp_rx_intr_mask); while (rx < budget) { -- cgit v0.10.2 From ed150e1a5cf20c04cf0b2d2c34e498fc1d6519be Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:58:40 +1000 Subject: xfs: don't perform lookups on zero-height btrees If the caller passes in a cursor to a zero-height btree (which is impossible), we never set block to anything but NULL, which causes the later dereference of it to crash. Instead, just return -EFSCORRUPTED. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b5c213a..33f1406 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1814,6 +1814,10 @@ xfs_btree_lookup( XFS_BTREE_STATS_INC(cur, lookup); + /* No such thing as a zero-level tree. */ + if (cur->bc_nlevels == 0) + return -EFSCORRUPTED; + block = NULL; keyno = 0; -- cgit v0.10.2 From 738f57c16a2bb527c705641f0fc1c68ff8cba72a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:59:19 +1000 Subject: xfs: disallow mounting of realtime + rmap filesystems Since the kernel doesn't currently support the realtime rmapbt, don't allow such filesystems to be mounted. Support will appear in a future release. Signed-off-by: Darrick J. Wong Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 24ef83e..fd6be45 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1574,9 +1574,16 @@ xfs_fs_fill_super( } } - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + if (mp->m_sb.sb_rblocks) { + xfs_alert(mp, + "EXPERIMENTAL reverse mapping btree not compatible with realtime device!"); + error = -EINVAL; + goto out_filestream_unmount; + } xfs_alert(mp, "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); + } error = xfs_mountfs(mp); if (error) -- cgit v0.10.2 From da1f039d6947b1a49f13b39a6de0df2a3e9e1ed1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:59:31 +1000 Subject: xfs: don't log the entire end of the AGF When we're logging the last non-spare field in the AGF, we don't need to log the spare fields, so plumb in a new AGF logging flag to help us avoid that. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3dd8f1d..05b5243 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2278,6 +2278,8 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_btreeblks), offsetof(xfs_agf_t, agf_uuid), offsetof(xfs_agf_t, agf_rmap_blocks), + /* needed so that we don't log the whole rest of the structure: */ + offsetof(xfs_agf_t, agf_spare64), sizeof(xfs_agf_t) }; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index e6a8bea..270fb5c 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -674,7 +674,8 @@ typedef struct xfs_agf { #define XFS_AGF_BTREEBLKS 0x00000800 #define XFS_AGF_UUID 0x00001000 #define XFS_AGF_RMAP_BLOCKS 0x00002000 -#define XFS_AGF_NUM_BITS 14 +#define XFS_AGF_SPARE64 0x00004000 +#define XFS_AGF_NUM_BITS 15 #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) #define XFS_AGF_FLAGS \ @@ -691,7 +692,8 @@ typedef struct xfs_agf { { XFS_AGF_LONGEST, "LONGEST" }, \ { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ { XFS_AGF_UUID, "UUID" }, \ - { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" } + { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" }, \ + { XFS_AGF_SPARE64, "SPARE64" } /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) -- cgit v0.10.2 From 722278997bc964349e23e7061d541f8df3133a04 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 15:59:50 +1000 Subject: xfs: fix some key handling problems in _btree_simple_query_range We only need the record's high key for the first record that we look at; for all records, we /definitely/ need the regular record key. Therefore, fix how the simple range query function gets its keys. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 33f1406..b70d9f9 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4563,10 +4563,10 @@ xfs_btree_simple_query_range( error = xfs_btree_get_rec(cur, &recp, &stat); if (error || !stat) break; - cur->bc_ops->init_high_key_from_rec(&rec_key, recp); /* Skip if high_key(rec) < low_key. */ if (firstrec) { + cur->bc_ops->init_high_key_from_rec(&rec_key, recp); firstrec = false; diff = cur->bc_ops->diff_two_keys(cur, low_key, &rec_key); @@ -4575,6 +4575,7 @@ xfs_btree_simple_query_range( } /* Stop if high_key < low_key(rec). */ + cur->bc_ops->init_key_from_rec(&rec_key, recp); diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); if (diff > 0) break; -- cgit v0.10.2 From 5b5c2dbd3c9bcfa89fba9709c12ecc0a445c6e40 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 26 Aug 2016 16:00:10 +1000 Subject: xfs: simple btree query range should look right if LE lookup fails If the initial LOOKUP_LE in the simple query range fails to find anything, we should attempt to increment the btree cursor to see if there actually /are/ records for what we're trying to find. Without this patch, a bnobt range query of (0, $agsize) returns no results because the leftmost record never has a startblock of zero. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b70d9f9..0856979 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4558,6 +4558,13 @@ xfs_btree_simple_query_range( if (error) goto out; + /* Nothing? See if there's anything to the right. */ + if (!stat) { + error = xfs_btree_increment(cur, 0, &stat); + if (error) + goto out; + } + while (stat) { /* Find the record. */ error = xfs_btree_get_rec(cur, &recp, &stat); -- cgit v0.10.2 From f3d7ebdeb2c297bd26272384e955033493ca291c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 26 Aug 2016 16:01:30 +1000 Subject: xfs: fix superblock inprogress check From inspection, the superblock sb_inprogress check is done in the verifier and triggered only for the primary superblock via a "bp->b_bn == XFS_SB_DADDR" check. Unfortunately, the primary superblock is an uncached buffer, and hence it is configured by xfs_buf_read_uncached() with: bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ And so this check never triggers. Fix it. cc: Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 0e3d4f5..4aecc5f 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -583,7 +583,8 @@ xfs_sb_verify( * Only check the in progress field for the primary superblock as * mkfs.xfs doesn't clear it from secondary superblocks. */ - return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR, + return xfs_mount_validate_sb(mp, &sb, + bp->b_maps[0].bm_bn == XFS_SB_DADDR, check_version); } -- cgit v0.10.2 From 800b2694f890cc35a1bda63501fc71c94389d517 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 26 Aug 2016 16:01:59 +1000 Subject: xfs: prevent dropping ioend completions during buftarg wait xfs_wait_buftarg() waits for all pending I/O, drains the ioend completion workqueue and walks the LRU until all buffers in the cache have been released. This is traditionally an unmount operation` but the mechanism is also reused during filesystem freeze. xfs_wait_buftarg() invokes drain_workqueue() as part of the quiesce, which is intended more for a shutdown sequence in that it indicates to the queue that new operations are not expected once the drain has begun. New work jobs after this point result in a WARN_ON_ONCE() and are otherwise dropped. With filesystem freeze, however, read operations are allowed and can proceed during or after the workqueue drain. If such a read occurs during the drain sequence, the workqueue infrastructure complains about the queued ioend completion work item and drops it on the floor. As a result, the buffer remains on the LRU and the freeze never completes. Despite the fact that the overall buffer cache cleanup is not necessary during freeze, fix up this operation such that it is safe to invoke during non-unmount quiesce operations. Replace the drain_workqueue() call with flush_workqueue(), which runs a similar serialization on pending workqueue jobs without causing new jobs to be dropped. This is safe for unmount as unmount independently locks out new operations by the time xfs_wait_buftarg() is invoked. cc: Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 607cc29..b5b9bff 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1611,7 +1611,7 @@ xfs_wait_buftarg( */ while (percpu_counter_sum(&btp->bt_io_count)) delay(100); - drain_workqueue(btp->bt_mount->m_buf_workqueue); + flush_workqueue(btp->bt_mount->m_buf_workqueue); /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { -- cgit v0.10.2 From 699e36e5b8e9f77b2be4c23f0b309e53be4b2880 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 23 Aug 2016 10:44:02 +0200 Subject: clocksource/drivers/timer-atmel-pit: Enable mck clock mck is needed to get the PIT working. Explicitly prepare_enable it instead of assuming it is enabled. This solves an issue where the system is freezing when the ETM/ETB drivers are enabled. Reported-by: Olivier Schonken Reviewed-by: Boris Brezillon Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index 1ffac0c..3494bc5 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -261,6 +261,12 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) return PTR_ERR(data->mck); } + ret = clk_prepare_enable(data->mck); + if (ret) { + pr_err("Unable to enable mck\n"); + return ret; + } + /* Get the interrupts property */ data->irq = irq_of_parse_and_map(node, 0); if (!data->irq) { -- cgit v0.10.2 From 4d0e701659aa869a445823149e172e23faa6edac Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski Date: Wed, 17 Aug 2016 12:22:33 +0200 Subject: drivers/clocksource/pistachio: Fix memory corruption in init Driver init code incorrectly uses the block base address and as a result clears clocksource structure's fields instead of the hardware registers. Commit 09a998201649 ("timekeeping: Lift clocksource cacheline restriction") has changed the offsets within pistachio_clocksource structure and what has previously gone unnoticed now leads to a kernel panic during boot. Signed-off-by: Marcin Nowakowski Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c index a7d9a08..a8e6c7d 100644 --- a/drivers/clocksource/time-pistachio.c +++ b/drivers/clocksource/time-pistachio.c @@ -202,10 +202,10 @@ static int __init pistachio_clksrc_of_init(struct device_node *node) rate = clk_get_rate(fast_clk); /* Disable irq's for clocksource usage */ - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 0); - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 1); - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 2); - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 3); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 0); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 1); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 2); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 3); /* Enable timer block */ writel(TIMER_ME_GLOBAL, pcs_gpt.base); -- cgit v0.10.2 From b53e7d000d9e6e9fd2c6eb6b82d2783c67fd599e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 25 Aug 2016 14:26:59 +0800 Subject: clocksource/drivers/sun4i: Clear interrupts after stopping timer in probe function The bootloader (U-boot) sometimes uses this timer for various delays. It uses it as a ongoing counter, and does comparisons on the current counter value. The timer counter is never stopped. In some cases when the user interacts with the bootloader, or lets it idle for some time before loading Linux, the timer may expire, and an interrupt will be pending. This results in an unexpected interrupt when the timer interrupt is enabled by the kernel, at which point the event_handler isn't set yet. This results in a NULL pointer dereference exception, panic, and no way to reboot. Clear any pending interrupts after we stop the timer in the probe function to avoid this. Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Signed-off-by: Daniel Lezcano Acked-by: Maxime Ripard diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index 97669ee..c83452c 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -123,12 +123,16 @@ static struct clock_event_device sun4i_clockevent = { .set_next_event = sun4i_clkevt_next_event, }; +static void sun4i_timer_clear_interrupt(void) +{ + writel(TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_ST_REG); +} static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evt = (struct clock_event_device *)dev_id; - writel(0x1, timer_base + TIMER_IRQ_ST_REG); + sun4i_timer_clear_interrupt(); evt->event_handler(evt); return IRQ_HANDLED; @@ -208,6 +212,9 @@ static int __init sun4i_timer_init(struct device_node *node) /* Make sure timer is stopped before playing with interrupts */ sun4i_clkevt_time_stop(0); + /* clear timer0 interrupt */ + sun4i_timer_clear_interrupt(); + sun4i_clockevent.cpumask = cpu_possible_mask; sun4i_clockevent.irq = irq; -- cgit v0.10.2 From 079d37df3397d48aab0f014986c1b0a1ca6256aa Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Thu, 25 Aug 2016 17:20:59 +0800 Subject: dlm: fix malfunction of dlm_tool caused by debugfs changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the current kernel, `dlm_tool lockdebug` fails as below: "dlm_tool lockdebug ED0BD86DCE724393918A1AE8FDBF1EE3 can't open /sys/kernel/debug/dlm/ED0BD86DCE724393918A1AE8FDBF1EE3: Operation not permitted" This is because table_open() depends on file->f_op to tell which seq_file ops should be passed down. But, the original file ops in file->f_op is replaced by "debugfs_full_proxy_file_operations" with commit 49d200deaa68 ("debugfs: prevent access to removed files' private data"). Currently, I can think up 2 solutions: 1st, replace debugfs_create_file() with debugfs_create_file_unsafe(); 2nd, make different table_open#() accordingly. The 1st one is neat, but I don't thoroughly understand its risk. Maybe someone has a better one. Signed-off-by: Eric Ren Signed-off-by: David Teigland diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index eea6491..466f7d6 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -607,20 +607,54 @@ static const struct file_operations format2_fops; static const struct file_operations format3_fops; static const struct file_operations format4_fops; -static int table_open(struct inode *inode, struct file *file) +static int table_open1(struct inode *inode, struct file *file) { struct seq_file *seq; - int ret = -1; + int ret; - if (file->f_op == &format1_fops) - ret = seq_open(file, &format1_seq_ops); - else if (file->f_op == &format2_fops) - ret = seq_open(file, &format2_seq_ops); - else if (file->f_op == &format3_fops) - ret = seq_open(file, &format3_seq_ops); - else if (file->f_op == &format4_fops) - ret = seq_open(file, &format4_seq_ops); + ret = seq_open(file, &format1_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; /* the dlm_ls */ + return 0; +} + +static int table_open2(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &format2_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; /* the dlm_ls */ + return 0; +} + +static int table_open3(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &format3_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; /* the dlm_ls */ + return 0; +} + +static int table_open4(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + ret = seq_open(file, &format4_seq_ops); if (ret) return ret; @@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file) static const struct file_operations format1_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open1, .read = seq_read, .llseek = seq_lseek, .release = seq_release @@ -639,7 +673,7 @@ static const struct file_operations format1_fops = { static const struct file_operations format2_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open2, .read = seq_read, .llseek = seq_lseek, .release = seq_release @@ -647,7 +681,7 @@ static const struct file_operations format2_fops = { static const struct file_operations format3_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open3, .read = seq_read, .llseek = seq_lseek, .release = seq_release @@ -655,7 +689,7 @@ static const struct file_operations format3_fops = { static const struct file_operations format4_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open4, .read = seq_read, .llseek = seq_lseek, .release = seq_release -- cgit v0.10.2 From c234af5875ffeab39d5a2c4230a477a35987a484 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 25 Aug 2016 07:51:10 +0100 Subject: net: hns: dereference ppe_cb->ppe_common_cb if it is non-null ppe_cb->ppe_common_cb is being dereferenced before a null check is being made on it. If ppe_cb->ppe_common_cb is null then we end up with a null pointer dereference when assigning dsaf_dev. Fix this by moving the initialisation of dsaf_dev once we know ppe_cb->ppe_common_cb is OK to dereference. Signed-off-by: Colin Ian King Acked-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index ff8b6a4..6ea8722 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -328,9 +328,10 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb) { u32 port; - struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev; if (ppe_cb->ppe_common_cb) { + struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev; + port = ppe_cb->index; dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 0); } -- cgit v0.10.2 From c15e07b02bf0450bc8e60f2cc51cb42daa371417 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 25 Aug 2016 18:30:52 +0200 Subject: team: loadbalance: push lacpdus to exact delivery When team is in bridge and LACP is utilized, LACPDU packets are pushed to userspace using raw socket and there they are processed. However, since 8626c56c8279b, LACPDU skbs are dropped by bridge rx_handler so they never reach packet handlers in rx path. Fix this by explicity treat LACPDUs to be pushed to exact delivery in team rx_handler. Reported-by: Ido Schimmel Fixes: 8626c56c8279b ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index cdb19b3..b228bea 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -14,9 +14,23 @@ #include #include #include +#include #include #include +static rx_handler_result_t lb_receive(struct team *team, struct team_port *port, + struct sk_buff *skb) +{ + if (unlikely(skb->protocol == htons(ETH_P_SLOW))) { + /* LACPDU packets should go to exact delivery */ + const unsigned char *dest = eth_hdr(skb)->h_dest; + + if (is_link_local_ether_addr(dest) && dest[5] == 0x02) + return RX_HANDLER_EXACT; + } + return RX_HANDLER_ANOTHER; +} + struct lb_priv; typedef struct team_port *lb_select_tx_port_func_t(struct team *, @@ -652,6 +666,7 @@ static const struct team_mode_ops lb_mode_ops = { .port_enter = lb_port_enter, .port_leave = lb_port_leave, .port_disabled = lb_port_disabled, + .receive = lb_receive, .transmit = lb_transmit, }; -- cgit v0.10.2 From 101b29a204f87c99377faa53bd378f101ebb1824 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Aug 2016 15:16:45 -0700 Subject: byteswap: don't use __builtin_bswap*() with sparse Although sparse declares __builtin_bswap*(), it can't actually do constant folding inside them (yet). As such, things like switch (protocol) { case htons(ETH_P_IP): break; } which we do all over the place cause sparse to warn that it expects a constant instead of a function call. Disable __HAVE_BUILTIN_BSWAP*__ if __CHECKER__ is defined to avoid this. Fixes: 7322dd755e7d ("byteswap: try to avoid __builtin_constant_p gcc bug") Link: http://lkml.kernel.org/r/1470914102-26389-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Johannes Berg Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e294939..8dbc892 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -242,7 +242,11 @@ */ #define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +/* + * sparse (__CHECKER__) pretends to be gcc, but can't do constant + * folding in __builtin_bswap*() (yet), so don't set these for it. + */ +#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__) #if GCC_VERSION >= 40400 #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ @@ -250,7 +254,7 @@ #if GCC_VERSION >= 40800 #define __HAVE_BUILTIN_BSWAP16__ #endif -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */ #if GCC_VERSION >= 50000 #define KASAN_ABI_VERSION 4 -- cgit v0.10.2 From 8582fb59f7ee012f2b8118c2bb95168c1622a4c5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Aug 2016 15:16:48 -0700 Subject: get_maintainer: quiet noisy implicit -f vcs_file_exists checking Checking command line filenames that are outside the git tree can emit a noisy and confusing message. Quiet that message by redirecting stderr. Verify that the command was executed successfully. Fixes: 4cad35a7ca69 ("get_maintainer.pl: reduce need for command-line option -f") Link: http://lkml.kernel.org/r/1970a1d2fecb258e384e2e4fdaacdc9ccf3e30a4.1470955439.git.joe@perches.com Signed-off-by: Joe Perches Reported-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 49a00d5..aed4511 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -2136,9 +2136,11 @@ sub vcs_file_exists { my $cmd = $VCS_cmds{"file_exists_cmd"}; $cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd - + $cmd .= " 2>&1"; $exists = &{$VCS_cmds{"execute_cmd"}}($cmd); + return 0 if ($? != 0); + return $exists; } -- cgit v0.10.2 From e7d316a02f683864a12389f8808570e37fb90aa3 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 25 Aug 2016 15:16:51 -0700 Subject: sysctl: handle error writing UINT_MAX to u32 fields We have scripts which write to certain fields on 3.18 kernels but this seems to be failing on 4.4 kernels. An entry which we write to here is xfrm_aevent_rseqth which is u32. echo 4294967295 > /proc/sys/net/core/xfrm_aevent_rseqth Commit 230633d109e3 ("kernel/sysctl.c: detect overflows when converting to int") prevented writing to sysctl entries when integer overflow occurs. However, this does not apply to unsigned integers. Heinrich suggested that we introduce a new option to handle 64 bit limits and set min as 0 and max as UINT_MAX. This might not work as it leads to issues similar to __do_proc_doulongvec_minmax. Alternatively, we would need to change the datatype of the entry to 64 bit. static int __do_proc_doulongvec_minmax(void *data, struct ctl_table { i = (unsigned long *) data; //This cast is causing to read beyond the size of data (u32) vleft = table->maxlen / sizeof(unsigned long); //vleft is 0 because maxlen is sizeof(u32) which is lesser than sizeof(unsigned long) on x86_64. Introduce a new proc handler proc_douintvec. Individual proc entries will need to be updated to use the new handler. [akpm@linux-foundation.org: coding-style fixes] Fixes: 230633d109e3 ("kernel/sysctl.c:detect overflows when converting to int") Link: http://lkml.kernel.org/r/1471479806-5252-1-git-send-email-subashab@codeaurora.org Signed-off-by: Subash Abhinov Kasiviswanathan Cc: Heinrich Schuchardt Cc: Kees Cook Cc: "David S. Miller" Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 697e160..a4f7203 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -42,6 +42,8 @@ extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int proc_douintvec(struct ctl_table *, int, + void __user *, size_t *, loff_t *); extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_jiffies(struct ctl_table *, int, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b43d0b2..a13bbda 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2140,6 +2140,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, return 0; } +static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp, + int *valp, + int write, void *data) +{ + if (write) { + if (*negp) + return -EINVAL; + *valp = *lvalp; + } else { + unsigned int val = *valp; + *lvalp = (unsigned long)val; + } + return 0; +} + static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, @@ -2259,8 +2274,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write, int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,buffer,lenp,ppos, - NULL,NULL); + return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); +} + +/** + * proc_douintvec - read a vector of unsigned integers + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer + * values from/to the user buffer, treated as an ASCII string. + * + * Returns 0 on success. + */ +int proc_douintvec(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return do_proc_dointvec(table, write, buffer, lenp, ppos, + do_proc_douintvec_conv, NULL); } /* @@ -2858,6 +2892,12 @@ int proc_dointvec(struct ctl_table *table, int write, return -ENOSYS; } +int proc_douintvec(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2903,6 +2943,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, * exception granted :-) */ EXPORT_SYMBOL(proc_dointvec); +EXPORT_SYMBOL(proc_douintvec); EXPORT_SYMBOL(proc_dointvec_jiffies); EXPORT_SYMBOL(proc_dointvec_minmax); EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); -- cgit v0.10.2 From 804dd150468cfd920d92d4b3cf00536fedef3902 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 25 Aug 2016 15:16:57 -0700 Subject: soft_dirty: fix soft_dirty during THP split While adding proper userfaultfd_wp support with bits in pagetable and swap entry to avoid false positives WP userfaults through swap/fork/ KSM/etc, I've been adding a framework that mostly mirrors soft dirty. So I noticed in one place I had to add uffd_wp support to the pagetables that wasn't covered by soft_dirty and I think it should have. Example: in the THP migration code migrate_misplaced_transhuge_page() pmd_mkdirty is called unconditionally after mk_huge_pmd. entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); That sets soft dirty too (it's a false positive for soft dirty, the soft dirty bit could be more finegrained and transfer the bit like uffd_wp will do.. pmd/pte_uffd_wp() enforces the invariant that when it's set pmd/pte_write is not set). However in the THP split there's no unconditional pmd_mkdirty after mk_huge_pmd and pte_swp_mksoft_dirty isn't called after the migration entry is created. The code sets the dirty bit in the struct page instead of setting it in the pagetable (which is fully equivalent as far as the real dirty bit is concerned, as the whole point of pagetable bits is to be eventually flushed out of to the page, but that is not equivalent for the soft-dirty bit that gets lost in translation). This was found by code review only and totally untested as I'm working to actually replace soft dirty and I don't have time to test potential soft dirty bugfixes as well :). Transfer the soft_dirty from pmd to pte during THP splits. This fix avoids losing the soft_dirty bit and avoids userland memory corruption in the checkpoint. Fixes: eef1b3ba053aa6 ("thp: implement split_huge_pmd()") Link: http://lkml.kernel.org/r/1471610515-30229-2-git-send-email-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Pavel Emelyanov Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2373f0a..2db2112 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1512,7 +1512,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, struct page *page; pgtable_t pgtable; pmd_t _pmd; - bool young, write, dirty; + bool young, write, dirty, soft_dirty; unsigned long addr; int i; @@ -1546,6 +1546,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, write = pmd_write(*pmd); young = pmd_young(*pmd); dirty = pmd_dirty(*pmd); + soft_dirty = pmd_soft_dirty(*pmd); pmdp_huge_split_prepare(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); @@ -1562,6 +1563,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, swp_entry_t swp_entry; swp_entry = make_migration_entry(page + i, write); entry = swp_entry_to_pte(swp_entry); + if (soft_dirty) + entry = pte_swp_mksoft_dirty(entry); } else { entry = mk_pte(page + i, vma->vm_page_prot); entry = maybe_mkwrite(entry, vma); @@ -1569,6 +1572,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_wrprotect(entry); if (!young) entry = pte_mkold(entry); + if (soft_dirty) + entry = pte_mksoft_dirty(entry); } if (dirty) SetPageDirty(page + i); -- cgit v0.10.2 From ae6c33ba6e37eea3012fe2640b22400ef3f2d0f3 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Thu, 25 Aug 2016 15:17:00 -0700 Subject: printk: fix parsing of "brl=" option Commit bbeddf52adc1 ("printk: move braille console support into separate braille.[ch] files") moved the parsing of braille-related options into _braille_console_setup(), changing the type of variable str from char* to char**. In this commit, memcmp(str, "brl,", 4) was correctly updated to memcmp(*str, "brl,", 4) but not memcmp(str, "brl=", 4). Update the code to make "brl=" option work again and replace memcmp() with strncmp() to make the compiler able to detect such an issue. Fixes: bbeddf52adc1 ("printk: move braille console support into separate braille.[ch] files") Link: http://lkml.kernel.org/r/20160823165700.28952-1-nicolas.iooss_linux@m4x.org Signed-off-by: Nicolas Iooss Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c index 276762f..d5760c4 100644 --- a/kernel/printk/braille.c +++ b/kernel/printk/braille.c @@ -9,10 +9,10 @@ char *_braille_console_setup(char **str, char **brl_options) { - if (!memcmp(*str, "brl,", 4)) { + if (!strncmp(*str, "brl,", 4)) { *brl_options = ""; *str += 4; - } else if (!memcmp(str, "brl=", 4)) { + } else if (!strncmp(*str, "brl=", 4)) { *brl_options = *str + 4; *str = strchr(*brl_options, ','); if (!*str) -- cgit v0.10.2 From a5ff1b34e16c203397542d98c49c5c7783193946 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 25 Aug 2016 15:17:02 -0700 Subject: treewide: replace config_enabled() with IS_ENABLED() (2nd round) Commit 97f2645f358b ("tree-wide: replace config_enabled() with IS_ENABLED()") mostly killed config_enabled(), but some new users have appeared for v4.8-rc1. They are all used for a boolean option, so can be replaced with IS_ENABLED() safely. Link: http://lkml.kernel.org/r/1471970749-24867-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Kees Cook Acked-by: Peter Oberparleiter Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Ralf Baechle Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index ea0cd97..5f98759 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -164,7 +164,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; */ static inline unsigned long ___pa(unsigned long x) { - if (config_enabled(CONFIG_64BIT)) { + if (IS_ENABLED(CONFIG_64BIT)) { /* * For MIPS64 the virtual address may either be in one of * the compatibility segements ckseg0 or ckseg1, or it may @@ -173,7 +173,7 @@ static inline unsigned long ___pa(unsigned long x) return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x); } - if (!config_enabled(CONFIG_EVA)) { + if (!IS_ENABLED(CONFIG_EVA)) { /* * We're using the standard MIPS32 legacy memory map, ie. * the address x is going to be in kseg0 or kseg1. We can diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ba5f456..7f7ba5f2 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -204,11 +204,9 @@ static void __init conmode_default(void) #endif } } else if (MACHINE_IS_KVM) { - if (sclp.has_vt220 && - config_enabled(CONFIG_SCLP_VT220_CONSOLE)) + if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; - else if (sclp.has_linemode && - config_enabled(CONFIG_SCLP_CONSOLE)) + else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) SET_CONSOLE_SCLP; else SET_CONSOLE_HVC; diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index ec8654f..bda8d5e 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -77,7 +77,7 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region) */ static inline bool kaslr_memory_enabled(void) { - return kaslr_enabled() && !config_enabled(CONFIG_KASAN); + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); } /* Initialize base and padding for each memory region randomized with KASLR */ -- cgit v0.10.2 From b32eaf71db6085f2ba54cf3ddf688bfc858219d0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 25 Aug 2016 15:17:05 -0700 Subject: mm: clarify COMPACTION Kconfig text The current wording of the COMPACTION Kconfig help text doesn't emphasise that disabling COMPACTION might cripple the page allocator which relies on the compaction quite heavily for high order requests and an unexpected OOM can happen with the lack of compaction. Make sure we are vocal about that. Link: http://lkml.kernel.org/r/20160823091726.GK23577@dhcp22.suse.cz Signed-off-by: Michal Hocko Cc: Markus Trippelsdorf Cc: Mel Gorman Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/Kconfig b/mm/Kconfig index 78a23c5..be0ee11 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -262,7 +262,14 @@ config COMPACTION select MIGRATION depends on MMU help - Allows the compaction of memory for the allocation of huge pages. + Compaction is the only memory management component to form + high order (larger physically contiguous) memory blocks + reliably. The page allocator relies on compaction heavily and + the lack of the feature can lead to unexpected OOM killer + invocations for high order memory requests. You shouldn't + disable this option unless there really is a strong reason for + it and then we would be really interested to hear about that at + linux-mm@kvack.org. # # support for page migration -- cgit v0.10.2 From 358c07fcc3b60ab08d77f1684de8bd81bcf49a1a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Aug 2016 15:17:08 -0700 Subject: mm: memcontrol: avoid unused function warning A bugfix in v4.8-rc2 introduced a harmless warning when CONFIG_MEMCG_SWAP is disabled but CONFIG_MEMCG is enabled: mm/memcontrol.c:4085:27: error: 'mem_cgroup_id_get_online' defined but not used [-Werror=unused-function] static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) This moves the function inside of the #ifdef block that hides the calling function, to avoid the warning. Fixes: 1f47b61fb407 ("mm: memcontrol: fix swap counter leak on swapout from offline cgroup") Link: http://lkml.kernel.org/r/20160824113733.2776701-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Michal Hocko Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2ff0289..9a6a51a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4082,24 +4082,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) atomic_add(n, &memcg->id.ref); } -static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) -{ - while (!atomic_inc_not_zero(&memcg->id.ref)) { - /* - * The root cgroup cannot be destroyed, so it's refcount must - * always be >= 1. - */ - if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { - VM_BUG_ON(1); - break; - } - memcg = parent_mem_cgroup(memcg); - if (!memcg) - memcg = root_mem_cgroup; - } - return memcg; -} - static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { if (atomic_sub_and_test(n, &memcg->id.ref)) { @@ -5821,6 +5803,24 @@ static int __init mem_cgroup_init(void) subsys_initcall(mem_cgroup_init); #ifdef CONFIG_MEMCG_SWAP +static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) +{ + while (!atomic_inc_not_zero(&memcg->id.ref)) { + /* + * The root cgroup cannot be destroyed, so it's refcount must + * always be >= 1. + */ + if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { + VM_BUG_ON(1); + break; + } + memcg = parent_mem_cgroup(memcg); + if (!memcg) + memcg = root_mem_cgroup; + } + return memcg; +} + /** * mem_cgroup_swapout - transfer a memsw charge to swap * @page: page whose memsw charge to transfer -- cgit v0.10.2 From 088bf2ff5d12e2e32ee52a4024fec26e582f44d3 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 25 Aug 2016 15:17:11 -0700 Subject: fs/seq_file: fix out-of-bounds read seq_read() is a nasty piece of work, not to mention buggy. It has (I think) an old bug which allows unprivileged userspace to read beyond the end of m->buf. I was getting these: BUG: KASAN: slab-out-of-bounds in seq_read+0xcd2/0x1480 at addr ffff880116889880 Read of size 2713 by task trinity-c2/1329 CPU: 2 PID: 1329 Comm: trinity-c2 Not tainted 4.8.0-rc1+ #96 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 Call Trace: kasan_object_err+0x1c/0x80 kasan_report_error+0x2cb/0x7e0 kasan_report+0x4e/0x80 check_memory_region+0x13e/0x1a0 kasan_check_read+0x11/0x20 seq_read+0xcd2/0x1480 proc_reg_read+0x10b/0x260 do_loop_readv_writev.part.5+0x140/0x2c0 do_readv_writev+0x589/0x860 vfs_readv+0x7b/0xd0 do_readv+0xd8/0x2c0 SyS_readv+0xb/0x10 do_syscall_64+0x1b3/0x4b0 entry_SYSCALL64_slow_path+0x25/0x25 Object at ffff880116889100, in cache kmalloc-4096 size: 4096 Allocated: PID = 1329 save_stack_trace+0x26/0x80 save_stack+0x46/0xd0 kasan_kmalloc+0xad/0xe0 __kmalloc+0x1aa/0x4a0 seq_buf_alloc+0x35/0x40 seq_read+0x7d8/0x1480 proc_reg_read+0x10b/0x260 do_loop_readv_writev.part.5+0x140/0x2c0 do_readv_writev+0x589/0x860 vfs_readv+0x7b/0xd0 do_readv+0xd8/0x2c0 SyS_readv+0xb/0x10 do_syscall_64+0x1b3/0x4b0 return_from_SYSCALL_64+0x0/0x6a Freed: PID = 0 (stack is not available) Memory state around the buggy address: ffff88011688a000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88011688a080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff88011688a100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88011688a180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88011688a200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Disabling lock debugging due to kernel taint This seems to be the same thing that Dave Jones was seeing here: https://lkml.org/lkml/2016/8/12/334 There are multiple issues here: 1) If we enter the function with a non-empty buffer, there is an attempt to flush it. But it was not clearing m->from after doing so, which means that if we try to do this flush twice in a row without any call to traverse() in between, we are going to be reading from the wrong place -- the splat above, fixed by this patch. 2) If there's a short write to userspace because of page faults, the buffer may already contain multiple lines (i.e. pos has advanced by more than 1), but we don't save the progress that was made so the next call will output what we've already returned previously. Since that is a much less serious issue (and I have a headache after staring at seq_read() for the past 8 hours), I'll leave that for now. Link: http://lkml.kernel.org/r/1471447270-32093-1-git-send-email-vegard.nossum@oracle.com Signed-off-by: Vegard Nossum Reported-by: Dave Jones Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/seq_file.c b/fs/seq_file.c index 19f532e..6dc4296 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) size -= n; buf += n; copied += n; - if (!m->count) + if (!m->count) { + m->from = 0; m->index++; + } if (!size) goto Done; } -- cgit v0.10.2 From d0e5845561c238619de9f5b77e0d763f4c331ca5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 25 Aug 2016 15:17:14 -0700 Subject: dax: fix device-dax region base The data offset for a dax region needs to account for a reservation in the resource range. Otherwise, device-dax is allowing mappings directly into the memmap or device-info-block area with crash signatures like the following: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: get_zone_device_page+0x11/0x30 Call Trace: follow_devmap_pmd+0x298/0x2c0 follow_page_mask+0x275/0x530 __get_user_pages+0xe3/0x750 __gfn_to_pfn_memslot+0x1b2/0x450 [kvm] tdp_page_fault+0x130/0x280 [kvm] kvm_mmu_page_fault+0x5f/0xf0 [kvm] handle_ept_violation+0x94/0x180 [kvm_intel] vmx_handle_exit+0x1d3/0x1440 [kvm_intel] kvm_arch_vcpu_ioctl_run+0x81d/0x16a0 [kvm] kvm_vcpu_ioctl+0x33c/0x620 [kvm] do_vfs_ioctl+0xa2/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1a/0xa4 Fixes: ab68f2622136 ("/dev/dax, pmem: direct access to persistent memory") Link: http://lkml.kernel.org/r/147205536732.1606.8994275381938837346.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reported-by: Abhilash Kumar Mulumudi Reported-by: Toshi Kani Tested-by: Toshi Kani Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index dfb1685..1f01e98 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -116,6 +116,9 @@ static int dax_pmem_probe(struct device *dev) if (rc) return rc; + /* adjust the dax_region resource to the start of data */ + res.start += le64_to_cpu(pfn_sb->dataoff); + nd_region = to_nd_region(dev->parent); dax_region = alloc_dax_region(dev, nd_region->id, &res, le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP); -- cgit v0.10.2 From 11bd969fdefea3ac0cb9791224f1e09784e21e58 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 25 Aug 2016 15:17:17 -0700 Subject: mm: silently skip readahead for DAX inodes For DAX inodes we need to be careful to never have page cache pages in the mapping->page_tree. This radix tree should be composed only of DAX exceptional entries and zero pages. ltp's readahead02 test was triggering a warning because we were trying to insert a DAX exceptional entry but found that a page cache page had already been inserted into the tree. This page was being inserted into the radix tree in response to a readahead(2) call. Readahead doesn't make sense for DAX inodes, but we don't want it to report a failure either. Instead, we just return success and don't do any work. Link: http://lkml.kernel.org/r/20160824221429.21158-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: Jeff Moyer Cc: Dan Williams Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/readahead.c b/mm/readahead.c index 65ec288..c8a955b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -544,6 +545,14 @@ do_readahead(struct address_space *mapping, struct file *filp, if (!mapping || !mapping->a_ops) return -EINVAL; + /* + * Readahead doesn't make sense for DAX inodes, but we don't want it + * to report a failure either. Instead, we just return success and + * don't do any work. + */ + if (dax_mapping(mapping)) + return 0; + return force_page_cache_readahead(mapping, filp, index, nr); } -- cgit v0.10.2 From e70c70c38d7a5ced76fc8b1c4a7ccee76e9c2911 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 26 Aug 2016 11:19:34 +0100 Subject: sfc: fix potential stack corruption from running past stat bitmask On 32-bit systems, mask is only an array of 3 longs, not 4, so don't try to write to mask[3]. Also include build-time checks in case the size of the bitmask changes. Fixes: 3c36a2aded8c ("sfc: display vadaptor statistics for all interfaces") Signed-off-by: Edward Cree Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index f658fee..e00a669 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1517,13 +1517,14 @@ static void efx_ef10_get_stat_mask(struct efx_nic *efx, unsigned long *mask) } #if BITS_PER_LONG == 64 + BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 2); mask[0] = raw_mask[0]; mask[1] = raw_mask[1]; #else + BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 3); mask[0] = raw_mask[0] & 0xffffffff; mask[1] = raw_mask[0] >> 32; mask[2] = raw_mask[1] & 0xffffffff; - mask[3] = raw_mask[1] >> 32; #endif } -- cgit v0.10.2 From 9dbeea7f08f3784b152d9fb3b86beb34aad77c72 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2016 08:51:39 -0700 Subject: rhashtable: fix a memory leak in alloc_bucket_locks() If vmalloc() was successful, do not attempt a kmalloc_array() Fixes: 4cf0b354d92e ("rhashtable: avoid large lock-array allocations") Reported-by: CAI Qian Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Herbert Xu Tested-by: CAI Qian Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 5ba520b..56054e5 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -77,17 +77,18 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, size = min_t(unsigned int, size, tbl->size >> 1); if (sizeof(spinlock_t) != 0) { + tbl->locks = NULL; #ifdef CONFIG_NUMA if (size * sizeof(spinlock_t) > PAGE_SIZE && gfp == GFP_KERNEL) tbl->locks = vmalloc(size * sizeof(spinlock_t)); - else #endif if (gfp != GFP_KERNEL) gfp |= __GFP_NOWARN | __GFP_NORETRY; - tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - gfp); + if (!tbl->locks) + tbl->locks = kmalloc_array(size, sizeof(spinlock_t), + gfp); if (!tbl->locks) return -ENOMEM; for (i = 0; i < size; i++) -- cgit v0.10.2 From f5b7b559e14881b27d76f9c97817ec82bfc48827 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Aug 2016 12:25:56 +0300 Subject: nvme-rdma: Get rid of duplicate variable We already have need_inval in ib_mr, lets use that instead. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c133256..881ac28 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -77,7 +77,6 @@ struct nvme_rdma_request { u32 num_sge; int nents; bool inline_data; - bool need_inval; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; @@ -286,7 +285,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int ret = 0; - if (!req->need_inval) + if (!req->mr->need_inval) goto out; ib_dereg_mr(req->mr); @@ -298,7 +297,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) req->mr = NULL; } - req->need_inval = false; + req->mr->need_inval = false; out: return ret; @@ -850,7 +849,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_bytes(rq)) return; - if (req->need_inval) { + if (req->mr->need_inval) { res = nvme_rdma_inv_rkey(queue, req); if (res < 0) { dev_err(ctrl->ctrl.device, @@ -936,7 +935,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - req->need_inval = true; + req->mr->need_inval = true; sg->addr = cpu_to_le64(req->mr->iova); put_unaligned_le24(req->mr->length, sg->length); @@ -959,7 +958,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->num_sge = 1; req->inline_data = false; - req->need_inval = false; + req->mr->need_inval = false; c->common.flags |= NVME_CMD_SGL_METABUF; @@ -1146,7 +1145,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && wc->ex.invalidate_rkey == req->mr->rkey) - req->need_inval = false; + req->mr->need_inval = false; blk_mq_complete_request(rq, status); @@ -1476,7 +1475,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH) flush = true; ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, - req->need_inval ? &req->reg_wr.wr : NULL, flush); + req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); if (ret) { nvme_rdma_unmap_data(queue, rq); goto err; -- cgit v0.10.2 From 4d8c6a7946d53648d9ed0e3852a1c81ce07d40db Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 26 Aug 2016 00:37:52 +0300 Subject: nvme-rdma: Get rid of redundant defines Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 881ac28..ab545fb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -43,10 +43,6 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 -#define NVME_RDMA_MAX_PAGES_PER_MR 512 - -#define NVME_RDMA_DEF_RECONNECT_DELAY 20 - /* * We handle AEN commands ourselves and don't even let the * block layer know about them. -- cgit v0.10.2 From 89f82cbb0d5c0ab768c8d02914188aa2211cd2e3 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 22 Aug 2016 15:15:23 -0400 Subject: drm/msm: fix use of copy_from_user() while holding spinlock Use instead __copy_from_user_inatomic() and fallback to slow-path where we drop and re-aquire the lock in case of fault. Cc: stable@vger.kernel.org Reported-by: Vaishali Thakkar Signed-off-by: Rob Clark diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 9766f9a..408da40 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -64,6 +64,14 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) kfree(submit); } +static inline unsigned long __must_check +copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + return __copy_from_user_inatomic(to, from, n); + return -EFAULT; +} + static int submit_lookup_objects(struct msm_gem_submit *submit, struct drm_msm_gem_submit *args, struct drm_file *file) { @@ -71,6 +79,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, int ret = 0; spin_lock(&file->table_lock); + pagefault_disable(); for (i = 0; i < args->nr_bos; i++) { struct drm_msm_gem_submit_bo submit_bo; @@ -84,10 +93,15 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, */ submit->bos[i].flags = 0; - ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo)); - if (ret) { - ret = -EFAULT; - goto out_unlock; + ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo)); + if (unlikely(ret)) { + pagefault_enable(); + spin_unlock(&file->table_lock); + ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo)); + if (ret) + goto out; + spin_lock(&file->table_lock); + pagefault_disable(); } if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) { @@ -127,9 +141,12 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, } out_unlock: - submit->nr_bos = i; + pagefault_enable(); spin_unlock(&file->table_lock); +out: + submit->nr_bos = i; + return ret; } -- cgit v0.10.2 From d78d383ab354b0b9e1d23404ae0d9fbdeb9aa035 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 22 Aug 2016 15:28:38 -0400 Subject: drm/msm: protect against faults from copy_from_user() in submit ioctl An evil userspace could try to cause deadlock by passing an unfaulted-in GEM bo as submit->bos (or submit->cmds) table. Which will trigger msm_gem_fault() while we already hold struct_mutex. See: https://github.com/freedreno/msmtest/blob/master/evilsubmittest.c Cc: stable@vger.kernel.org Signed-off-by: Rob Clark diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index b4bc7f1..d0da52f 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -157,6 +157,12 @@ struct msm_drm_private { struct shrinker shrinker; struct msm_vblank_ctrl vblank_ctrl; + + /* task holding struct_mutex.. currently only used in submit path + * to detect and reject faults from copy_from_user() for submit + * ioctl. + */ + struct task_struct *struct_mutex_task; }; struct msm_format { diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 6cd4af4..85f3047 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -196,11 +196,20 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_device *dev = obj->dev; + struct msm_drm_private *priv = dev->dev_private; struct page **pages; unsigned long pfn; pgoff_t pgoff; int ret; + /* This should only happen if userspace tries to pass a mmap'd + * but unfaulted gem bo vaddr into submit ioctl, triggering + * a page fault while struct_mutex is already held. This is + * not a valid use-case so just bail. + */ + if (priv->struct_mutex_task == current) + return VM_FAULT_SIGBUS; + /* Make sure we don't parallel update on a fault, nor move or remove * something from beneath our feet */ diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 408da40..880d6a9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -394,6 +394,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) return ret; + priv->struct_mutex_task = current; + submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds); if (!submit) { ret = -ENOMEM; @@ -485,6 +487,7 @@ out: if (ret) msm_gem_submit_free(submit); out_unlock: + priv->struct_mutex_task = NULL; mutex_unlock(&dev->struct_mutex); return ret; } -- cgit v0.10.2 From e09c978aae5bedfdb379be80363b024b7d82638b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Aug 2016 23:44:04 -0400 Subject: NFSv4.1: Fix Oopsable condition in server callback races The slot table hasn't been an array since v3.7. Ensure that we use nfs4_lookup_slot() to access the slot correctly. Fixes: 87dda67e7386 ("NFSv4.1: Allow SEQUENCE to resize the slot table...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.8+ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c92a75e..a4cf6d2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -454,11 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp, ((u32 *)&rclist->rcl_sessionid.data)[3], ref->rc_sequenceid, ref->rc_slotid); - spin_lock(&tbl->slot_tbl_lock); - status = (test_bit(ref->rc_slotid, tbl->used_slots) && - tbl->slots[ref->rc_slotid].seq_nr == + status = nfs4_slot_seqid_in_use(tbl, ref->rc_slotid, ref->rc_sequenceid); - spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; } diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 332d06e..c1f4c20 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -172,6 +172,39 @@ struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid) return ERR_PTR(-E2BIG); } +static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, + u32 *seq_nr) + __must_hold(&tbl->slot_tbl_lock) +{ + struct nfs4_slot *slot; + + slot = nfs4_lookup_slot(tbl, slotid); + if (IS_ERR(slot)) + return PTR_ERR(slot); + *seq_nr = slot->seq_nr; + return 0; +} + +/* + * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use + * + * Given a slot table, slot id and sequence number, determine if the + * RPC call in question is still in flight. This function is mainly + * intended for use by the callback channel. + */ +bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr) +{ + u32 cur_seq; + bool ret = false; + + spin_lock(&tbl->slot_tbl_lock); + if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 && + cur_seq == seq_nr && test_bit(slotid, tbl->used_slots)) + ret = true; + spin_unlock(&tbl->slot_tbl_lock); + return ret; +} + /* * nfs4_alloc_slot - efficiently look for a free slot * diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 5b51298..33cace6 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -78,6 +78,7 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid); +extern bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr); extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -- cgit v0.10.2 From 045d2a6d076a2ecd7043ea543ea198af943f8b16 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 13:25:43 -0400 Subject: NFSv4.1: Delay callback processing when there are referring triples If CB_SEQUENCE tells us that the processing of this request depends on the completion of one or more referring triples (see RFC 5661 Section 2.10.6.3), delay the callback processing until after the RPC requests being referred to have completed. If we end up delaying for more than 1/2 second, then fall back to returning NFS4ERR_DELAY in reply to the callback. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a4cf6d2..c359329 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -454,8 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp, ((u32 *)&rclist->rcl_sessionid.data)[3], ref->rc_sequenceid, ref->rc_slotid); - status = nfs4_slot_seqid_in_use(tbl, ref->rc_slotid, - ref->rc_sequenceid); + status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid, + ref->rc_sequenceid, HZ >> 1) < 0; if (status) goto out; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1949bbd..0cc0c31 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -686,6 +686,8 @@ out_unlock: res->sr_slot = NULL; if (send_new_highest_used_slotid) nfs41_notify_server(session->clp); + if (waitqueue_active(&tbl->slot_waitq)) + wake_up_all(&tbl->slot_waitq); } int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index c1f4c20..b629730 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -28,6 +28,7 @@ static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue) tbl->highest_used_slotid = NFS4_NO_SLOT; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue); + init_waitqueue_head(&tbl->slot_waitq); init_completion(&tbl->complete); } @@ -192,7 +193,8 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, * RPC call in question is still in flight. This function is mainly * intended for use by the callback channel. */ -bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr) +static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_nr) { u32 cur_seq; bool ret = false; @@ -206,6 +208,24 @@ bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr) } /* + * nfs4_slot_wait_on_seqid - wait until a slot sequence id is complete + * + * Given a slot table, slot id and sequence number, wait until the + * corresponding RPC call completes. This function is mainly + * intended for use by the callback channel. + */ +int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_nr, + unsigned long timeout) +{ + if (wait_event_timeout(tbl->slot_waitq, + !nfs4_slot_seqid_in_use(tbl, slotid, seq_nr), + timeout) == 0) + return -ETIMEDOUT; + return 0; +} + +/* * nfs4_alloc_slot - efficiently look for a free slot * * nfs4_alloc_slot looks for an unset bit in the used_slots bitmap. diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 33cace6..fa75d7d 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -36,6 +36,7 @@ struct nfs4_slot_table { unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ spinlock_t slot_tbl_lock; struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ + wait_queue_head_t slot_waitq; /* Completion wait on slot */ u32 max_slots; /* # slots in table */ u32 max_slotid; /* Max allowed slotid value */ u32 highest_used_slotid; /* sent to server on each SEQ. @@ -78,7 +79,9 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid); -extern bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr); +extern int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_nr, + unsigned long timeout); extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -- cgit v0.10.2 From 07e8dcbda71ef87e9cbdc42b5bb16a44c1ab839b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 10:28:25 -0400 Subject: NFSv4.1: Defer bumping the slot sequence number until we free the slot For operations like OPEN or LAYOUTGET, which return recallable state (i.e. delegations and layouts) we want to enable the mechanism for resolving recall races in RFC5661 Section 2.10.6.3. To do so, we will want to defer bumping the slot's sequence number until we have finished processing the RPC results. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0cc0c31..de4a89d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -666,6 +666,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) tbl = slot->table; session = tbl->session; + /* Bump the slot sequence number */ + if (slot->seq_done) + slot->seq_nr++; + slot->seq_done = 0; + spin_lock(&tbl->slot_tbl_lock); /* Be nice to the server: try to ensure that the last transmitted * value for highest_user_slotid <= target_highest_slotid @@ -716,7 +721,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) switch (res->sr_status) { case 0: /* Update the slot's sequence and clientid lease timer */ - ++slot->seq_nr; + slot->seq_done = 1; clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ @@ -771,7 +776,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) goto retry_nowait; default: /* Just update the slot sequence no. */ - ++slot->seq_nr; + slot->seq_done = 1; } out: /* The session may be reset by one of the error handlers. */ diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index fa75d7d..f703b75 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -21,7 +21,8 @@ struct nfs4_slot { unsigned long generation; u32 slot_nr; u32 seq_nr; - unsigned int interrupted : 1; + unsigned int interrupted : 1, + seq_done : 1; }; /* Sessions */ -- cgit v0.10.2 From 2e80dbe7ac51a911e8a828407b1a48c5ba938cd2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 11:50:26 -0400 Subject: NFSv4.1: Close callback races for OPEN, LAYOUTGET and LAYOUTRETURN Defer freeing the slot until after we have processed the results from OPEN and LAYOUTGET. This means that the server can rely on the mechanism in RFC5661 Section 2.10.6.3 to ensure that replies to an OPEN or LAYOUTGET/RETURN RPC call don't race with the callbacks that apply to them. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index de4a89d..f5aecaa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -634,15 +634,11 @@ out_sleep: } EXPORT_SYMBOL_GPL(nfs40_setup_sequence); -static int nfs40_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) +static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_slot *slot = res->sr_slot; struct nfs4_slot_table *tbl; - if (slot == NULL) - goto out; - tbl = slot->table; spin_lock(&tbl->slot_tbl_lock); if (!nfs41_wake_and_assign_slot(tbl, slot)) @@ -650,7 +646,13 @@ static int nfs40_sequence_done(struct rpc_task *task, spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; -out: +} + +static int nfs40_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) + nfs40_sequence_free_slot(res); return 1; } @@ -695,7 +697,8 @@ out_unlock: wake_up_all(&tbl->slot_waitq); } -int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) +static int nfs41_sequence_process(struct rpc_task *task, + struct nfs4_sequence_res *res) { struct nfs4_session *session; struct nfs4_slot *slot = res->sr_slot; @@ -781,11 +784,11 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) out: /* The session may be reset by one of the error handlers. */ dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); - nfs41_sequence_free_slot(res); out_noaction: return ret; retry_nowait: if (rpc_restart_call_prepare(task)) { + nfs41_sequence_free_slot(res); task->tk_status = 0; ret = 0; } @@ -796,8 +799,37 @@ out_retry: rpc_delay(task, NFS4_POLL_RETRY_MAX); return 0; } + +int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + if (!nfs41_sequence_process(task, res)) + return 0; + if (res->sr_slot != NULL) + nfs41_sequence_free_slot(res); + return 1; + +} EXPORT_SYMBOL_GPL(nfs41_sequence_done); +static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + if (res->sr_slot == NULL) + return 1; + if (res->sr_slot->table->session != NULL) + return nfs41_sequence_process(task, res); + return nfs40_sequence_done(task, res); +} + +static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) { + if (res->sr_slot->table->session != NULL) + nfs41_sequence_free_slot(res); + else + nfs40_sequence_free_slot(res); + } +} + int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { if (res->sr_slot == NULL) @@ -927,6 +959,17 @@ static int nfs4_setup_sequence(const struct nfs_server *server, args, res, task); } +static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + return nfs40_sequence_done(task, res); +} + +static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) + nfs40_sequence_free_slot(res); +} + int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -1204,6 +1247,7 @@ static void nfs4_opendata_free(struct kref *kref) struct super_block *sb = p->dentry->d_sb; nfs_free_seqid(p->o_arg.seqid); + nfs4_sequence_free_slot(&p->o_res.seq_res); if (p->state != NULL) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); @@ -1663,9 +1707,14 @@ err: static struct nfs4_state * nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { + struct nfs4_state *ret; + if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) - return _nfs4_opendata_reclaim_to_nfs4_state(data); - return _nfs4_opendata_to_nfs4_state(data); + ret =_nfs4_opendata_reclaim_to_nfs4_state(data); + else + ret = _nfs4_opendata_to_nfs4_state(data); + nfs4_sequence_free_slot(&data->o_res.seq_res); + return ret; } static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) @@ -2063,7 +2112,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; - if (!nfs4_sequence_done(task, &data->o_res.seq_res)) + if (!nfs4_sequence_process(task, &data->o_res.seq_res)) return; if (task->tk_status == 0) { @@ -7871,7 +7920,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) struct nfs4_layoutget *lgp = calldata; dprintk("--> %s\n", __func__); - nfs41_sequence_done(task, &lgp->res.seq_res); + nfs41_sequence_process(task, &lgp->res.seq_res); dprintk("<-- %s\n", __func__); } @@ -8087,6 +8136,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ if (status == 0 && lgp->res.layoutp->len) lseg = pnfs_layout_process(lgp); + nfs4_sequence_free_slot(&lgp->res.seq_res); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) @@ -8113,7 +8163,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) dprintk("--> %s\n", __func__); - if (!nfs41_sequence_done(task, &lrp->res.seq_res)) + if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; server = NFS_SERVER(lrp->args.inode); @@ -8125,6 +8175,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) break; + nfs4_sequence_free_slot(&lrp->res.seq_res); rpc_restart_call_prepare(task); return; } @@ -8145,6 +8196,7 @@ static void nfs4_layoutreturn_release(void *calldata) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&lo->plh_inode->i_lock); + nfs4_sequence_free_slot(&lrp->res.seq_res); pnfs_free_lseg_list(&freeme); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); -- cgit v0.10.2 From d138027a8256a3e9d7657c8d0dae84c08ef2cfe1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 28 Aug 2016 12:19:04 -0400 Subject: NFSv4.1: Remove obsolete and incorrrect assignment in nfs4_callback_sequence Signed-off-by: Trond Myklebust diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c359329..f953ef6 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -484,7 +484,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; tbl = &clp->cl_session->bc_slot_table; - slot = tbl->slots + args->csa_slotid; /* Set up res before grabbing the spinlock */ memcpy(&res->csr_sessionid, &args->csa_sessionid, -- cgit v0.10.2 From add1fa75101263ab4d74240f93000998d4325624 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Sat, 27 Aug 2016 01:02:28 +0200 Subject: drm/atomic: Don't potentially reset color_mgmt_changed on successive property updates. Due to assigning the 'replaced' value instead of or'ing it, if drm_atomic_crtc_set_property() gets called multiple times, the last call will define the color_mgmt_changed flag, so a non-updating call to a property can reset the flag and prevent actual hw state updates required by preceding property updates. Signed-off-by: Mario Kleiner Cc: Daniel Vetter Cc: # v4.6+ Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index fa39307..2a3ded4 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -475,7 +475,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc, val, -1, &replaced); - state->color_mgmt_changed = replaced; + state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { ret = drm_atomic_replace_property_blob_from_id(crtc, @@ -483,7 +483,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc, val, sizeof(struct drm_color_ctm), &replaced); - state->color_mgmt_changed = replaced; + state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { ret = drm_atomic_replace_property_blob_from_id(crtc, @@ -491,7 +491,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc, val, -1, &replaced); - state->color_mgmt_changed = replaced; + state->color_mgmt_changed |= replaced; return ret; } else if (crtc->funcs->atomic_set_property) return crtc->funcs->atomic_set_property(crtc, state, property, val); -- cgit v0.10.2 From 3eab887a55424fc2c27553b7bfe32330df83f7b8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Aug 2016 15:04:33 -0700 Subject: Linux 4.8-rc4 diff --git a/Makefile b/Makefile index 3537aa2..67f42d5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v0.10.2 From 17de0a9ff3df8f54f2f47746d118112d4e61d973 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Aug 2016 11:33:58 +1000 Subject: iomap: don't set FIEMAP_EXTENT_MERGED for extent based filesystems Filesystems like XFS that use extents should not set the FIEMAP_EXTENT_MERGED flag in the fiemap extent structures. To allow for both behaviors for the upcoming gfs2 usage split the iomap type field into type and flags, and only set FIEMAP_EXTENT_MERGED if the IOMAP_F_MERGED flag is set. The flags field will also come in handy for future features such as shared extents on reflink-enabled file systems. Reported-by: Andreas Gruenbacher Signed-off-by: Christoph Hellwig Acked-by: Darrick J. Wong Signed-off-by: Dave Chinner diff --git a/fs/iomap.c b/fs/iomap.c index 0342254..706270f 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -428,9 +428,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, break; } + if (iomap->flags & IOMAP_F_MERGED) + flags |= FIEMAP_EXTENT_MERGED; + return fiemap_fill_next_extent(fi, iomap->offset, iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0, - iomap->length, flags | FIEMAP_EXTENT_MERGED); + iomap->length, flags); } diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 3267df4..3d70ece 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -19,6 +19,11 @@ struct vm_fault; #define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ /* + * Flags for iomap mappings: + */ +#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ + +/* * Magic value for blkno: */ #define IOMAP_NULL_BLOCK -1LL /* blkno is not valid */ @@ -27,7 +32,8 @@ struct iomap { sector_t blkno; /* 1st sector of mapping, 512b units */ loff_t offset; /* file offset of mapping, bytes */ u64 length; /* length of mapping, bytes */ - int type; /* type of mapping */ + u16 type; /* type of mapping */ + u16 flags; /* flags for mapping */ struct block_device *bdev; /* block device for I/O */ }; -- cgit v0.10.2 From cc7786d3ee7e3c979799db834b528db2c0834c2e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 25 Jul 2016 14:26:51 +1000 Subject: powerpc/tm: do not use r13 for tabort_syscall tabort_syscall runs with RI=1, so a nested recoverable machine check will load the paca into r13 and overwrite what we loaded it with, because exceptions returning to privileged mode do not restore r13. Fixes: b4b56f9ecab4 (powerpc/tm: Abort syscalls in active transactions) Cc: stable@vger.kernel.org Signed-off-by: Nick Piggin Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6b8bc0d..5afd03e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -368,13 +368,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) tabort_syscall: /* Firstly we need to enable TM in the kernel */ mfmsr r10 - li r13, 1 - rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG + li r9, 1 + rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG mtmsrd r10, 0 /* tabort, this dooms the transaction, nothing else */ - li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R13) + li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R9) /* * Return directly to userspace. We have corrupted user register state, @@ -382,8 +382,8 @@ tabort_syscall: * resume after the tbegin of the aborted transaction with the * checkpointed register state. */ - li r13, MSR_RI - andc r10, r10, r13 + li r9, MSR_RI + andc r10, r10, r9 mtmsrd r10, 1 mtspr SPRN_SRR0, r11 mtspr SPRN_SRR1, r12 -- cgit v0.10.2 From a9cbf0b2195b695cbeeeecaa4e2770948c212e9a Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Mon, 22 Aug 2016 12:17:44 +0530 Subject: powerpc/powernv : Drop reference added by kset_find_obj() In a situation, where Linux kernel gets notified about duplicate error log from OPAL, it is been observed that kernel fails to remove sysfs entries (/sys/firmware/opal/elog/0xXXXXXXXX) of such error logs. This is because, we currently search the error log/dump kobject in the kset list via 'kset_find_obj()' routine. Which eventually increment the reference count by one, once it founds the kobject. So, unless we decrement the reference count by one after it found the kobject, we would not be able to release the kobject properly later. This patch adds the 'kobject_put()' which was missing earlier. Signed-off-by: Mukesh Ojha Cc: stable@vger.kernel.org Reviewed-by: Vasant Hegde Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 2ee9643..4c82782 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -370,6 +370,7 @@ static irqreturn_t process_dump(int irq, void *data) uint32_t dump_id, dump_size, dump_type; struct dump_obj *dump; char name[22]; + struct kobject *kobj; rc = dump_read_info(&dump_id, &dump_size, &dump_type); if (rc != OPAL_SUCCESS) @@ -381,8 +382,12 @@ static irqreturn_t process_dump(int irq, void *data) * that gracefully and not create two conflicting * entries. */ - if (kset_find_obj(dump_kset, name)) + kobj = kset_find_obj(dump_kset, name); + if (kobj) { + /* Drop reference added by kset_find_obj() */ + kobject_put(kobj); return 0; + } dump = create_dump_obj(dump_id, dump_size, dump_type); if (!dump) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 37f959b..f2344cb 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -247,6 +247,7 @@ static irqreturn_t elog_event(int irq, void *data) uint64_t elog_type; int rc; char name[2+16+1]; + struct kobject *kobj; rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { @@ -269,8 +270,12 @@ static irqreturn_t elog_event(int irq, void *data) * that gracefully and not create two conflicting * entries. */ - if (kset_find_obj(elog_kset, name)) + kobj = kset_find_obj(elog_kset, name); + if (kobj) { + /* Drop reference added by kset_find_obj() */ + kobject_put(kobj); return IRQ_HANDLED; + } create_elog_obj(log_id, elog_size, elog_type); -- cgit v0.10.2 From 78a3e8889b4b6b99775ed954696ff3e017f5d19b Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 23 Aug 2016 10:46:17 +1000 Subject: powerpc: signals: Discard transaction state from signal frames Userspace can begin and suspend a transaction within the signal handler which means they might enter sys_rt_sigreturn() with the processor in suspended state. sys_rt_sigreturn() wants to restore process context (which may have been in a transaction before signal delivery). To do this it must restore TM SPRS. To achieve this, any transaction initiated within the signal frame must be discarded in order to be able to restore TM SPRs as TM SPRs can only be manipulated non-transactionally.. >From the PowerPC ISA: TM Bad Thing Exception [Category: Transactional Memory] An attempt is made to execute a mtspr targeting a TM register in other than Non-transactional state. Not doing so results in a TM Bad Thing: [12045.221359] Kernel BUG at c000000000050a40 [verbose debug info unavailable] [12045.221470] Unexpected TM Bad Thing exception at c000000000050a40 (msr 0x201033) [12045.221540] Oops: Unrecoverable exception, sig: 6 [#1] [12045.221586] SMP NR_CPUS=2048 NUMA PowerNV [12045.221634] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables kvm_hv kvm uio_pdrv_genirq ipmi_powernv uio powernv_rng ipmi_msghandler autofs4 ses enclosure scsi_transport_sas bnx2x ipr mdio libcrc32c [12045.222167] CPU: 68 PID: 6178 Comm: sigreturnpanic Not tainted 4.7.0 #34 [12045.222224] task: c0000000fce38600 ti: c0000000fceb4000 task.ti: c0000000fceb4000 [12045.222293] NIP: c000000000050a40 LR: c0000000000163bc CTR: 0000000000000000 [12045.222361] REGS: c0000000fceb7ac0 TRAP: 0700 Not tainted (4.7.0) [12045.222418] MSR: 9000000300201033 CR: 28444280 XER: 20000000 [12045.222625] CFAR: c0000000000163b8 SOFTE: 0 PACATMSCRATCH: 900000014280f033 GPR00: 01100000b8000001 c0000000fceb7d40 c00000000139c100 c0000000fce390d0 GPR04: 900000034280f033 0000000000000000 0000000000000000 0000000000000000 GPR08: 0000000000000000 b000000000001033 0000000000000001 0000000000000000 GPR12: 0000000000000000 c000000002926400 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 00003ffff98cadd0 00003ffff98cb470 0000000000000000 GPR28: 900000034280f033 c0000000fceb7ea0 0000000000000001 c0000000fce390d0 [12045.223535] NIP [c000000000050a40] tm_restore_sprs+0xc/0x1c [12045.223584] LR [c0000000000163bc] tm_recheckpoint+0x5c/0xa0 [12045.223630] Call Trace: [12045.223655] [c0000000fceb7d80] [c000000000026e74] sys_rt_sigreturn+0x494/0x6c0 [12045.223738] [c0000000fceb7e30] [c0000000000092e0] system_call+0x38/0x108 [12045.223806] Instruction dump: [12045.223841] 7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8 [12045.223955] 4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020 [12045.224074] ---[ end trace cb8002ee240bae76 ]--- It isn't clear exactly if there is really a use case for userspace returning with a suspended transaction, however, doing so doesn't (on its own) constitute a bad frame. As such, this patch simply discards the transactional state of the context calling the sigreturn and continues. Reported-by: Laurent Dufour Signed-off-by: Cyril Bur Tested-by: Laurent Dufour Reviewed-by: Laurent Dufour Acked-by: Simon Guo Signed-off-by: Benjamin Herrenschmidt diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt index ba0a2a4..e32fdbb 100644 --- a/Documentation/powerpc/transactional_memory.txt +++ b/Documentation/powerpc/transactional_memory.txt @@ -167,6 +167,8 @@ signal will be rolled back anyway. For signals taken in non-TM or suspended mode, we use the normal/non-checkpointed stack pointer. +Any transaction initiated inside a sighandler and suspended on return +from the sighandler to the kernel will get reclaimed and discarded. Failure cause codes used by kernel ================================== diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index b6aa378..a7daf74 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1226,7 +1226,21 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf))) goto bad; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); + if (__get_user(tmp, &rt_sf->uc.uc_link)) goto bad; uc_transact = (struct ucontext __user *)(uintptr_t)tmp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 7e49984..70409bb 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -676,7 +676,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; if (MSR_TM_ACTIVE(msr)) { -- cgit v0.10.2 From fe4c988bdd1cc60402a4e3ca3976a686ea991b5a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:42 +0300 Subject: net/mlx5e: Limit UMR length to the device's limitation ConnectX-4 UMR (User Memory Region) MTT translation table offset in WQE is limited to U16_MAX, before this patch we ignored that limitation and requested the maximum possible UMR translation length that the netdev might need (MAX channels * MAX pages per channel). In case of a system with #cores > 32 and when linear WQE allocation fails, falling back to using UMR WQEs will cause the RQ (Receive Queue) to get stuck. Here we limit UMR length to min(U16_MAX, max required pages) (while considering the required alignments) on driver load, by default U16_MAX is sufficient since the default RX rings value guarantees that we are in range, dynamically (on set_ringparam/set_channels) we will check if the new required UMR length (num mtts) is still in range, if not, fail the request. Fixes: bc77b240b3c5 ('net/mlx5e: Add fragmented memory support for RX multi packet WQE') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1b495ef..d63a1b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -73,8 +73,12 @@ #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ MLX5_MPWRQ_WQE_PAGE_ORDER) -#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \ - BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)) + +#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) +#define MLX5E_REQUIRED_MTTS(rqs, wqes)\ + (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX) + #define MLX5_UMR_ALIGN (2048) #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) @@ -304,6 +308,7 @@ struct mlx5e_rq { unsigned long state; int ix; + u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -814,11 +819,6 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) MLX5E_MAX_NUM_CHANNELS); } -static inline int mlx5e_get_mtt_octw(int npages) -{ - return ALIGN(npages, 8) / 2; -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4a3757e..9cfe408 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -373,6 +373,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; + u32 num_mtts; int err = 0; if (param->rx_jumbo_pending) { @@ -397,6 +398,15 @@ static int mlx5e_set_ringparam(struct net_device *dev, 1 << mlx5_max_log_rq_size(rq_wq_type)); return -EINVAL; } + + num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, param->rx_pending); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !MLX5E_VALID_NUM_MTTS(num_mtts)) { + netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", + __func__, param->rx_pending); + return -EINVAL; + } + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", __func__, param->tx_pending, @@ -454,6 +464,7 @@ static int mlx5e_set_channels(struct net_device *dev, unsigned int count = ch->combined_count; bool arfs_enabled; bool was_opened; + u32 num_mtts; int err = 0; if (!count) { @@ -472,6 +483,14 @@ static int mlx5e_set_channels(struct net_device *dev, return -EINVAL; } + num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size)); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !MLX5E_VALID_NUM_MTTS(num_mtts)) { + netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n", + __func__, count); + return -EINVAL; + } + if (priv->params.num_channels == count) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 03d944c..65360b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -340,6 +340,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; + rq->mpwqe_mtt_offset = c->ix * + MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); + rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; @@ -3233,8 +3236,8 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) struct mlx5_create_mkey_mbox_in *in; struct mlx5_mkey_seg *mkc; int inlen = sizeof(*in); - u64 npages = - priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev), + BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)); int err; in = mlx5_vzalloc(inlen); @@ -3248,10 +3251,12 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) MLX5_PERM_LOCAL_WRITE | MLX5_ACCESS_MODE_MTT; + npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages); + mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); mkc->len = cpu_to_be64(npages << PAGE_SHIFT); - mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); + mkc->xlt_oct_size = cpu_to_be32(MLX5_MTT_OCTW(npages)); mkc->log2_page_size = PAGE_SHIFT; err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 9f2a16a..bdc9e33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -324,9 +324,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, } } -static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix) +static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) { - return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS + + return rq->mpwqe_mtt_offset + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } @@ -340,7 +340,7 @@ static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5_wqe_data_seg *dseg = &wqe->data; struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); - u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix); + u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); memset(wqe, 0, sizeof(*wqe)); cseg->opmod_idx_opcode = @@ -353,9 +353,9 @@ static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; ucseg->klm_octowords = - cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE)); + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); ucseg->bsf_octowords = - cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset)); + cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); dseg->lkey = sq->mkey_be; @@ -423,7 +423,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, { struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; int mtt_sz = mlx5e_get_wqe_mtt_sz(); - u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT; + u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int i; wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * -- cgit v0.10.2 From f2fde18c52a7367a8f6cf6855e2a7174e601c8ee Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:43 +0300 Subject: net/mlx5e: Don't wait for RQ completions on close This will significantly reduce receive queue flush time on interface down. Instead of asking the firmware to flush the RQ (Receive Queue) via asynchronous completions when moved to error, we handle RQ flush manually (mlx5e_free_rx_descs) same as we did when RQ flush got timed out. This will reduce RQs flush time and speedup interface down procedure (ifconfig down) from 6 sec to 0.3 sec on a 48 cores system. Moved mlx5e_free_rx_descs en_main.c where it is needed, to keep en_rx.c free form non critical data path code for better code locality. Fixes: 6cd392a082de ('net/mlx5e: Handle RQ flush in error cases') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d63a1b8..26a7ec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -223,9 +223,8 @@ struct mlx5e_tstamp { }; enum { - MLX5E_RQ_STATE_POST_WQES_ENABLE, + MLX5E_RQ_STATE_FLUSH, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, - MLX5E_RQ_STATE_FLUSH_TIMEOUT, MLX5E_RQ_STATE_AM, }; @@ -703,7 +702,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_tx_descs(struct mlx5e_sq *sq); -void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65360b1..2463eba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -431,7 +431,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); - MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -528,6 +527,23 @@ static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) return -ETIMEDOUT; } +static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe; + __be16 wqe_ix_be; + u16 wqe_ix; + + while (!mlx5_wq_ll_is_empty(wq)) { + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } +} + static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -551,8 +567,6 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; sq->ico_wqe_info[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ @@ -569,23 +583,8 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { - int tout = 0; - int err; - - clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - - err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); - while (!mlx5_wq_ll_is_empty(&rq->wq) && !err && - tout++ < MLX5_EN_QP_FLUSH_MAX_ITER) - msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); - - if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER) - set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state); - - /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ - napi_synchronize(&rq->channel->napi); - cancel_work_sync(&rq->am.work); mlx5e_disable_rq(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bdc9e33..fee1e47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -595,26 +595,9 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) wi->free_wqe(rq, wi); } -void mlx5e_free_rx_descs(struct mlx5e_rq *rq) -{ - struct mlx5_wq_ll *wq = &rq->wq; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_ix_be; - u16 wqe_ix; - - while (!mlx5_wq_ll_is_empty(wq)) { - wqe_ix_be = *wq->tail_next; - wqe_ix = be16_to_cpu(wqe_ix_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix); - rq->dealloc_wqe(rq, wqe_ix); - mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, - &wqe->next.next_wqe_index); - } -} - #define RQ_CANNOT_POST(rq) \ - (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \ - test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \ + test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { @@ -916,7 +899,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); int work_done = 0; - if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state))) + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) return 0; if (cq->decmprs_left) -- cgit v0.10.2 From 8484f9ed13b26043be80ff5774506024956eae8f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:44 +0300 Subject: net/mlx5e: Don't post fragmented MPWQE when RQ is disabled ICO (Internal control operations) SQ (Send Queue) is closed/disabled after RQ (Receive Queue). After RQ is closed an ICO SQ completion might post a fragmented MPWQE (Multi Packet Work Queue Element) into that RQ. As on regular RQ post, check if we are allowed to post to that RQ (RQ is enabled). Cleanup in-progress UMR MPWQE on mlx5e_free_rx_descs if needed. Fixes: bc77b240b3c5 ('net/mlx5e: Add fragmented memory support for RX multi packet WQE') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2463eba..e259eaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -534,6 +534,10 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) __be16 wqe_ix_be; u16 wqe_ix; + /* UMR WQE (if in progress) is always at wq->head */ + if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; wqe_ix = be16_to_cpu(wqe_ix_be); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fee1e47..b6f8ebb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -506,6 +506,12 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { + mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + return; + } + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); rq->stats.mpwqe_frag++; -- cgit v0.10.2 From 6e8dd6d6f4bd2fd6fefdbf2e73bf251e36db59af Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:45 +0300 Subject: net/mlx5e: Don't wait for SQ completions on close Instead of asking the firmware to flush the SQ (Send Queue) via asynchronous completions when moved to error, we handle SQ flush manually (mlx5e_free_tx_descs) same as we did when SQ flush got timed out or on tx_timeout. This will reduce SQs flush time and speedup interface down procedure. Moved mlx5e_free_tx_descs to the end of en_tx.c for tx critical code locality. Fixes: 29429f3300a3 ('net/mlx5e: Timeout if SQ doesn't flush during close') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 26a7ec7..bf722aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -369,9 +369,8 @@ struct mlx5e_sq_dma { }; enum { - MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, + MLX5E_SQ_STATE_FLUSH, MLX5E_SQ_STATE_BF_ENABLE, - MLX5E_SQ_STATE_TX_TIMEOUT, }; struct mlx5e_ico_wqe_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e259eaa..297781a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,13 +39,6 @@ #include "eswitch.h" #include "vxlan.h" -enum { - MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000, - MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20, - MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS / - MLX5_EN_QP_FLUSH_MSLEEP_QUANT, -}; - struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; @@ -827,7 +820,6 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, goto err_disable_sq; if (sq->txq) { - set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); } @@ -851,38 +843,20 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - int tout = 0; - int err; + set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + /* prevent netif_tx_wake_queue */ + napi_synchronize(&sq->channel->napi); if (sq->txq) { - clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - /* prevent netif_tx_wake_queue */ - napi_synchronize(&sq->channel->napi); netif_tx_disable_queue(sq->txq); - /* ensure hw is notified of all pending wqes */ + /* last doorbell out, godspeed .. */ if (mlx5e_sq_has_room_for(sq, 1)) mlx5e_send_nop(sq, true); - - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_ERR, false, 0); - if (err) - set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); } - /* wait till sq is empty, unless a TX timeout occurred on this SQ */ - while (sq->cc != sq->pc && - !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) { - msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); - if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER) - set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); - } - - /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ - napi_synchronize(&sq->channel->napi); - - mlx5e_free_tx_descs(sq); mlx5e_disable_sq(sq); + mlx5e_free_tx_descs(sq); mlx5e_destroy_sq(sq); } @@ -2802,7 +2776,7 @@ static void mlx5e_tx_timeout(struct net_device *dev) if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; sched_work = true; - set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); + set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index e073bf59..5f209ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -394,35 +394,6 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) -{ - struct mlx5e_tx_wqe_info *wi; - struct sk_buff *skb; - u16 ci; - int i; - - while (sq->cc != sq->pc) { - ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; - - if (!skb) { /* nop */ - sq->cc++; - continue; - } - - for (i = 0; i < wi->num_dma; i++) { - struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, sq->dma_fifo_cc++); - - mlx5e_tx_dma_unmap(sq->pdev, dma); - } - - dev_kfree_skb_any(skb); - sq->cc += wi->num_wqebbs; - } -} - bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_sq *sq; @@ -434,7 +405,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_sq, cq); - if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state))) + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) return false; npkts = 0; @@ -512,11 +483,39 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) && - likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) { - netif_tx_wake_queue(sq->txq); - sq->stats.wake++; + mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) { + netif_tx_wake_queue(sq->txq); + sq->stats.wake++; } return (i == MLX5E_TX_CQ_POLL_BUDGET); } + +void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct sk_buff *skb; + u16 ci; + int i; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + skb = sq->skb[ci]; + wi = &sq->wqe_info[ci]; + + if (!skb) { /* nop */ + sq->cc++; + continue; + } + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, sq->dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + dev_kfree_skb_any(skb); + sq->cc += wi->num_wqebbs; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 64ae2e8..9bf33bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -51,16 +51,18 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { + struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq); struct mlx5_wq_cyc *wq; struct mlx5_cqe64 *cqe; - struct mlx5e_sq *sq; u16 sqcc; + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return; + cqe = mlx5e_get_cqe(cq); if (likely(!cqe)) return; - sq = container_of(cq, struct mlx5e_sq, cq); wq = &sq->wq; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), -- cgit v0.10.2 From cc8e9ebf952699cb6870f1366a4920d05b036e31 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 29 Aug 2016 01:13:46 +0300 Subject: net/mlx5e: Fix ethtool -g/G rx ring parameter report with striding RQ The driver RQ has two possible configurations: striding RQ and non-striding RQ. Until this patch, the driver always reported the number of hardware WQEs (ring descriptors). For non striding RQ configuration, this was OK since we have one WQE per pending packet For striding RQ, multiple packets can fit into one WQE. For better user experience we normalize the rx_pending parameter (size of wqe/mtu) as the average ring size in case of striding RQ. Fixes: 461017cb006a ('net/mlx5e: Support RX multi-packet WQE ...') Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 9cfe408..d0cf8fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -352,15 +352,61 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, sq_stats_desc, j); } +static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, + int num_wqe) +{ + int packets_per_wqe; + int stride_size; + int num_strides; + int wqe_size; + + if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return num_wqe; + + stride_size = 1 << priv->params.mpwqe_log_stride_sz; + num_strides = 1 << priv->params.mpwqe_log_num_strides; + wqe_size = stride_size * num_strides; + + packets_per_wqe = wqe_size / + ALIGN(ETH_DATA_LEN, stride_size); + return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1)); +} + +static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, + int num_packets) +{ + int packets_per_wqe; + int stride_size; + int num_strides; + int wqe_size; + int num_wqes; + + if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return num_packets; + + stride_size = 1 << priv->params.mpwqe_log_stride_sz; + num_strides = 1 << priv->params.mpwqe_log_num_strides; + wqe_size = stride_size * num_strides; + + num_packets = (1 << order_base_2(num_packets)); + + packets_per_wqe = wqe_size / + ALIGN(ETH_DATA_LEN, stride_size); + num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe); + return 1 << (order_base_2(num_wqes)); +} + static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); int rq_wq_type = priv->params.rq_wq_type; - param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type); + param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_max_log_rq_size(rq_wq_type)); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; - param->rx_pending = 1 << priv->params.log_rq_size; + param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << priv->params.log_rq_size); param->tx_pending = 1 << priv->params.log_sq_size; } @@ -370,6 +416,9 @@ static int mlx5e_set_ringparam(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); bool was_opened; int rq_wq_type = priv->params.rq_wq_type; + u32 rx_pending_wqes; + u32 min_rq_size; + u32 max_rq_size; u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; @@ -386,20 +435,29 @@ static int mlx5e_set_ringparam(struct net_device *dev, __func__); return -EINVAL; } - if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) { + + min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_min_log_rq_size(rq_wq_type)); + max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_max_log_rq_size(rq_wq_type)); + rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type, + param->rx_pending); + + if (param->rx_pending < min_rq_size) { netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - 1 << mlx5_min_log_rq_size(rq_wq_type)); + min_rq_size); return -EINVAL; } - if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) { + if (param->rx_pending > max_rq_size) { netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, - 1 << mlx5_max_log_rq_size(rq_wq_type)); + max_rq_size); return -EINVAL; } - num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, param->rx_pending); + num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, + rx_pending_wqes); if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", @@ -420,9 +478,9 @@ static int mlx5e_set_ringparam(struct net_device *dev, return -EINVAL; } - log_rq_size = order_base_2(param->rx_pending); + log_rq_size = order_base_2(rx_pending_wqes); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending); + min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes); if (log_rq_size == priv->params.log_rq_size && log_sq_size == priv->params.log_sq_size && -- cgit v0.10.2 From c8cf78fe100b0d152a1932327c24cefc0ba4bdbe Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 29 Aug 2016 01:13:47 +0300 Subject: net/mlx5e: Add ethtool counter for TX xmit_more Add a counter in ethtool for the number of times that TX xmit_more was used. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 297781a..2459c7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -155,6 +155,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; + s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; tx_offload_none += sq_stats->csum_none; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 7b9d8a9..499487c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -70,6 +70,7 @@ struct mlx5e_sw_stats { u64 tx_queue_stopped; u64 tx_queue_wake; u64 tx_queue_dropped; + u64 tx_xmit_more; u64 rx_wqe_err; u64 rx_mpwqe_filler; u64 rx_mpwqe_frag; @@ -101,6 +102,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, @@ -298,6 +300,7 @@ struct mlx5e_sq_stats { /* commonly accessed in data path */ u64 packets; u64 bytes; + u64 xmit_more; u64 tso_packets; u64 tso_bytes; u64 tso_inner_packets; @@ -324,6 +327,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5f209ad..988eca9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -375,6 +375,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.packets++; sq->stats.bytes += num_bytes; + sq->stats.xmit_more += skb->xmit_more; return NETDEV_TX_OK; dma_unmap_wqe_err: -- cgit v0.10.2 From bf50082c15eb2bc47d1922e70f424c57f36646d5 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 29 Aug 2016 01:13:48 +0300 Subject: net/mlx5e: Fix memory leak if refreshing TIRs fails Free 'in' command object also when mlx5_core_modify_tir fails. Fixes: 724b2aa15126 ("net/mlx5e: TIRs management refactoring") Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 673043c..9cce153 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -139,7 +139,7 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) struct mlx5e_tir *tir; void *in; int inlen; - int err; + int err = 0; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -151,10 +151,11 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); if (err) - return err; + goto out; } +out: kvfree(in); - return 0; + return err; } -- cgit v0.10.2 From 1722b9694ecfbc602865017c3fa6da0e3ec234d8 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 29 Aug 2016 01:13:49 +0300 Subject: net/mlx5: Add error prints when validate ETS failed Upon set ETS failure due to user invalid input, add error prints to specify the exact error to the user. Fixes: cdcf11212b22 ('net/mlx5e: Validate BW weight values of ETS') Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index caa9a3c..762af16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -127,29 +127,40 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw); } -static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets) +static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, + struct ieee_ets *ets) { int bw_sum = 0; int i; /* Validate Priority */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) + if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) { + netdev_err(netdev, + "Failed to validate ETS: priority value greater than max(%d)\n", + MLX5E_MAX_PRIORITY); return -EINVAL; + } } /* Validate Bandwidth Sum */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { - if (!ets->tc_tx_bw[i]) + if (!ets->tc_tx_bw[i]) { + netdev_err(netdev, + "Failed to validate ETS: BW 0 is illegal\n"); return -EINVAL; + } bw_sum += ets->tc_tx_bw[i]; } } - if (bw_sum != 0 && bw_sum != 100) + if (bw_sum != 0 && bw_sum != 100) { + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; + } return 0; } @@ -159,7 +170,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); int err; - err = mlx5e_dbcnl_validate_ets(ets); + err = mlx5e_dbcnl_validate_ets(netdev, ets); if (err) return err; -- cgit v0.10.2 From e5835f2833b12808c53aa621d1d3aa085706b5b3 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 29 Aug 2016 01:13:50 +0300 Subject: net/mlx5: Increase number of ethtool steering priorities Ethtool has 11 flow tables, each flow table has its own priority. Increase the number of priorities to be aligned with the number of flow tables. Fixes: 1174fce8d141 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 75bb8c8..3d6c1f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -80,7 +80,7 @@ LEFTOVERS_NUM_PRIOS) #define ETHTOOL_PRIO_NUM_LEVELS 1 -#define ETHTOOL_NUM_PRIOS 10 +#define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) /* Vlan, mac, ttc, aRFS */ #define KERNEL_NIC_PRIO_NUM_LEVELS 4 -- cgit v0.10.2 From 7d13eca09ed5e477f6ecfd97a35058762228b5e4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 27 Aug 2016 15:34:20 -0700 Subject: Documentation: networking: dsa: Remove platform device TODO Since commit 83c0afaec7b7 ("net: dsa: Add new binding implementation"), the shortcomings of the dsa platform device have been addressed, remove that TODO item. Signed-off-by: Florian Fainelli Acked-by: Andrew Lunn Signed-off-by: David S. Miller diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 9d05ed7..f20c884 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -587,26 +587,6 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device. TODO ==== -The platform device problem ---------------------------- -DSA is currently implemented as a platform device driver which is far from ideal -as was discussed in this thread: - -http://permalink.gmane.org/gmane.linux.network/329848 - -This basically prevents the device driver model to be properly used and applied, -and support non-MDIO, non-MMIO Ethernet connected switches. - -Another problem with the platform device driver approach is that it prevents the -use of a modular switch drivers build due to a circular dependency, illustrated -here: - -http://comments.gmane.org/gmane.linux.network/345803 - -Attempts of reworking this has been done here: - -https://lwn.net/Articles/643149/ - Making SWITCHDEV and DSA converge towards an unified codebase ------------------------------------------------------------- -- cgit v0.10.2 From 2fb04fdf30192ff1e2b5834e9b7745889ea8bbcb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 27 Aug 2016 17:33:03 +0100 Subject: net: smc91x: fix SMC accesses Commit b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") broke some ARM platforms through several mistakes. Firstly, the access size must correspond to the following rule: (a) at least one of 16-bit or 8-bit access size must be supported (b) 32-bit accesses are optional, and may be enabled in addition to the above. Secondly, it provides no emulation of 16-bit accesses, instead blindly making 16-bit accesses even when the platform specifies that only 8-bit is supported. Reorganise smc91x.h so we can make use of the existing 16-bit access emulation already provided - if 16-bit accesses are supported, use 16-bit accesses directly, otherwise if 8-bit accesses are supported, use the provided 16-bit access emulation. If neither, BUG(). This exactly reflects the driver behaviour prior to the commit being fixed. Since the conversion incorrectly cut down the available access sizes on several platforms, we also need to go through every platform and fix up the overly-restrictive access size: Arnd assumed that if a platform can perform 32-bit, 16-bit and 8-bit accesses, then only a 32-bit access size needed to be specified - not so, all available access sizes must be specified. This likely fixes some performance regressions in doing this: if a platform does not support 8-bit accesses, 8-bit accesses have been emulated by performing a 16-bit read-modify-write access. Tested on the Intel Assabet/Neponset platform, which supports only 8-bit accesses, which was broken by the original commit. Fixes: b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") Signed-off-by: Russell King Tested-by: Robert Jarzmik Signed-off-by: David S. Miller diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index c410d84..66070ac 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -83,7 +83,8 @@ static struct resource smc91x_resources[] = { }; static struct smc91x_platdata smc91x_platdata = { - .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_USE_DMA | SMC91X_NOWAIT, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c index 3f06cd9..056369e 100644 --- a/arch/arm/mach-pxa/xcep.c +++ b/arch/arm/mach-pxa/xcep.c @@ -120,7 +120,8 @@ static struct resource smc91x_resources[] = { }; static struct smc91x_platdata xcep_smc91x_info = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT | SMC91X_USE_DMA, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index baf1745..a0ead0a 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -93,7 +93,8 @@ static struct smsc911x_platform_config smsc911x_config = { }; static struct smc91x_platdata smc91x_platdata = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT, }; static struct platform_device realview_eth_device = { diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index 1525d7b..88149f8 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -45,7 +45,7 @@ static struct resource smc91x_resources[] = { }; static struct smc91x_platdata smc91x_platdata = { - .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT, }; static struct platform_device smc91x_device = { diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c index c6db52b..10c5777 100644 --- a/arch/blackfin/mach-bf561/boards/cm_bf561.c +++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c @@ -146,7 +146,8 @@ static struct platform_device hitachi_fb_device = { #include static struct smc91x_platdata smc91x_info = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT, .leda = RPC_LED_100_10, .ledb = RPC_LED_TX_RX, }; diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c index f35525b..57d1c43 100644 --- a/arch/blackfin/mach-bf561/boards/ezkit.c +++ b/arch/blackfin/mach-bf561/boards/ezkit.c @@ -134,7 +134,8 @@ static struct platform_device net2272_bfin_device = { #include static struct smc91x_platdata smc91x_info = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT, .leda = RPC_LED_100_10, .ledb = RPC_LED_TX_RX, }; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 726b80f..503a3b6 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2275,6 +2275,13 @@ static int smc_drv_probe(struct platform_device *pdev) if (pd) { memcpy(&lp->cfg, pd, sizeof(lp->cfg)); lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags); + + if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) { + dev_err(&pdev->dev, + "at least one of 8-bit or 16-bit access support is required.\n"); + ret = -ENXIO; + goto out_free_netdev; + } } #if IS_BUILTIN(CONFIG_OF) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 1a55c79..e17671c 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -37,6 +37,27 @@ #include /* + * Any 16-bit access is performed with two 8-bit accesses if the hardware + * can't do it directly. Most registers are 16-bit so those are mandatory. + */ +#define SMC_outw_b(x, a, r) \ + do { \ + unsigned int __val16 = (x); \ + unsigned int __reg = (r); \ + SMC_outb(__val16, a, __reg); \ + SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \ + } while (0) + +#define SMC_inw_b(a, r) \ + ({ \ + unsigned int __val16; \ + unsigned int __reg = r; \ + __val16 = SMC_inb(a, __reg); \ + __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \ + __val16; \ + }) + +/* * Define your architecture specific bus configuration parameters here. */ @@ -55,10 +76,30 @@ #define SMC_IO_SHIFT (lp->io_shift) #define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) +#define SMC_inw(a, r) \ + ({ \ + unsigned int __smc_r = r; \ + SMC_16BIT(lp) ? readw((a) + __smc_r) : \ + SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) : \ + ({ BUG(); 0; }); \ + }) + #define SMC_inl(a, r) readl((a) + (r)) #define SMC_outb(v, a, r) writeb(v, (a) + (r)) +#define SMC_outw(v, a, r) \ + do { \ + unsigned int __v = v, __smc_r = r; \ + if (SMC_16BIT(lp)) \ + __SMC_outw(__v, a, __smc_r); \ + else if (SMC_8BIT(lp)) \ + SMC_outw_b(__v, a, __smc_r); \ + else \ + BUG(); \ + } while (0) + #define SMC_outl(v, a, r) writel(v, (a) + (r)) +#define SMC_insb(a, r, p, l) readsb((a) + (r), p, l) +#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, l) #define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) #define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) @@ -66,7 +107,7 @@ #define SMC_IRQ_FLAGS (-1) /* from resource */ /* We actually can't write halfwords properly if not word aligned */ -static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg) +static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) { if ((machine_is_mainstone() || machine_is_stargate2() || machine_is_pxa_idp()) && reg & 2) { @@ -416,24 +457,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma, #if ! SMC_CAN_USE_16BIT -/* - * Any 16-bit access is performed with two 8-bit accesses if the hardware - * can't do it directly. Most registers are 16-bit so those are mandatory. - */ -#define SMC_outw(x, ioaddr, reg) \ - do { \ - unsigned int __val16 = (x); \ - SMC_outb( __val16, ioaddr, reg ); \ - SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ - } while (0) -#define SMC_inw(ioaddr, reg) \ - ({ \ - unsigned int __val16; \ - __val16 = SMC_inb( ioaddr, reg ); \ - __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ - __val16; \ - }) - +#define SMC_outw(x, ioaddr, reg) SMC_outw_b(x, ioaddr, reg) +#define SMC_inw(ioaddr, reg) SMC_inw_b(ioaddr, reg) #define SMC_insw(a, r, p, l) BUG() #define SMC_outsw(a, r, p, l) BUG() diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index 76199b7..e302c44 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -1,6 +1,16 @@ #ifndef __SMC91X_H__ #define __SMC91X_H__ +/* + * These bits define which access sizes a platform can support, rather + * than the maximal access size. So, if your platform can do 16-bit + * and 32-bit accesses to the SMC91x device, but not 8-bit, set both + * SMC91X_USE_16BIT and SMC91X_USE_32BIT. + * + * The SMC91x driver requires at least one of SMC91X_USE_8BIT or + * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is + * an invalid configuration. + */ #define SMC91X_USE_8BIT (1 << 0) #define SMC91X_USE_16BIT (1 << 1) #define SMC91X_USE_32BIT (1 << 2) -- cgit v0.10.2 From b99b43bb4bdf1d361f7487cf03d803082bbf9101 Mon Sep 17 00:00:00 2001 From: Owen Lin Date: Fri, 26 Aug 2016 13:49:09 +0800 Subject: Add Killer E2500 device ID in alx driver. Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 6453148..4eb17da 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1545,6 +1545,8 @@ static const struct pci_device_id alx_pci_tbl[] = { .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, + { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2500), + .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) }, diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h index 0959e68..1fc2d85 100644 --- a/drivers/net/ethernet/atheros/alx/reg.h +++ b/drivers/net/ethernet/atheros/alx/reg.h @@ -38,6 +38,7 @@ #define ALX_DEV_ID_AR8161 0x1091 #define ALX_DEV_ID_E2200 0xe091 #define ALX_DEV_ID_E2400 0xe0a1 +#define ALX_DEV_ID_E2500 0xe0b1 #define ALX_DEV_ID_AR8162 0x1090 #define ALX_DEV_ID_AR8171 0x10A1 #define ALX_DEV_ID_AR8172 0x10A0 -- cgit v0.10.2 From f48d1496b8537d75776478c6942dd87f34d7f270 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 15 Aug 2016 18:25:17 -0400 Subject: platform/olpc: Make ec explicitly non-modular The Kconfig entry controlling compilation of this code is: arch/x86/Kconfig:config OLPC arch/x86/Kconfig: bool "One Laptop Per Child support" ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. We delete the MODULE_LICENSE tag etc. since all that information was (or is now) contained at the top of the file in the comments. Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Paul Gortmaker Acked-by: Andres Salomon Signed-off-by: Darren Hart diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c index f99b183..374a802 100644 --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -1,6 +1,8 @@ /* * Generic driver for the OLPC Embedded Controller. * + * Author: Andres Salomon + * * Copyright (C) 2011-2012 One Laptop per Child Foundation. * * Licensed under the GPL v2 or later. @@ -12,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -326,8 +328,4 @@ static int __init olpc_ec_init_module(void) { return platform_driver_register(&olpc_ec_plat_driver); } - arch_initcall(olpc_ec_init_module); - -MODULE_AUTHOR("Andres Salomon "); -MODULE_LICENSE("GPL"); -- cgit v0.10.2 From da43bf0c21e57fff0221da5de0a9a388ec0d27cd Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 15 Aug 2016 18:24:59 -0400 Subject: intel_pmic_gpio: Make explicitly non-modular The Kconfig entry controlling compilation of this code is: drivers/platform/x86/Kconfig:config GPIO_INTEL_PMIC drivers/platform/x86/Kconfig: bool "Intel PMIC GPIO support" ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. We delete the MODULE_LICENSE tag etc. since all that information was (or is now) contained at the top of the file in the comments. We don't replace module.h with init.h since the file already has that. Cc: Alek Du Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c index 63b371d..91ae585 100644 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ b/drivers/platform/x86/intel_pmic_gpio.c @@ -1,6 +1,8 @@ /* Moorestown PMIC GPIO (access through IPC) driver * Copyright (c) 2008 - 2009, Intel Corporation. * + * Author: Alek Du + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -21,7 +23,6 @@ #define pr_fmt(fmt) "%s: " fmt, __func__ -#include #include #include #include @@ -322,9 +323,4 @@ static int __init platform_pmic_gpio_init(void) { return platform_driver_register(&platform_pmic_gpio_driver); } - subsys_initcall(platform_pmic_gpio_init); - -MODULE_AUTHOR("Alek Du "); -MODULE_DESCRIPTION("Intel Moorestown PMIC GPIO driver"); -MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From 98744b408c757901df57fa50cbd5826245dc3a1f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 29 Aug 2016 08:44:03 +0200 Subject: clocksource/drivers/atmel-pit: Fix compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix introduced a check against the ret variable which is not defined, hence producing a compilation error: linux/drivers/clocksource/timer-atmel-pit.c: In function ‘at91sam926x_pit_dt_init’: linux/drivers/clocksource/timer-atmel-pit.c:264:2: error: ‘ret’ undeclared (first use in this function) ret = clk_prepare_enable(data->mck); ^ linux/drivers/clocksource/timer-atmel-pit.c:264:2: note: each undeclared identifier is reported only once for each function it appears in Add the missing the variable 'ret'. Fixes: 504f34c9e45c "clocksource/drivers/atmel-pit: Convert init function to return error" Signed-off-by: Daniel Lezcano Cc: alexandre.belloni@free-electrons.com Cc: motobud@gmail.com Cc: realbright@lgcns.com Link: http://lkml.kernel.org/r/1472453043-24287-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Thomas Gleixner diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index 3494bc5..7f0f5b2 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -240,6 +240,7 @@ static int __init at91sam926x_pit_common_init(struct pit_data *data) static int __init at91sam926x_pit_dt_init(struct device_node *node) { struct pit_data *data; + int ret; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) -- cgit v0.10.2 From 3c3292634fc2de1ab97b6aa3222fee647f737adb Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 29 Aug 2016 13:18:23 +0200 Subject: hwmon: (it87) Add missing sysfs attribute group terminator Attribute array it87_attributes_in lacks its NULL terminator, causing random behavior when operating on the attribute group. Fixes: 52929715634a ("hwmon: (it87) Use is_visible for voltage sensors") Signed-off-by: Jean Delvare Cc: Martin Blumenstingl Cc: Guenter Roeck Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index d0203a1..4667012 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -2015,6 +2015,7 @@ static struct attribute *it87_attributes_in[] = { &sensor_dev_attr_in10_input.dev_attr.attr, /* 41 */ &sensor_dev_attr_in11_input.dev_attr.attr, /* 41 */ &sensor_dev_attr_in12_input.dev_attr.attr, /* 41 */ + NULL }; static const struct attribute_group it87_group_in = { -- cgit v0.10.2 From c6c1f9bc798bee7cfc2e172cd2c9b48187d801a7 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 15 Aug 2016 16:41:13 +0800 Subject: drm/imx: Add active plane reconfiguration support We don't support configuring active plane on-the-fly for imx-drm. The relevant CRTC should be disabled before the plane configuration. Of course, the plane itself should be disabled as well. This patch adds active plane reconfiguration support by forcing CRTC mode change and disabling-enabling plane in plane's ->atomic_update callback. Suggested-by: Daniel Vetter Cc: Philipp Zabel Cc: David Airlie Cc: Russell King Cc: Daniel Vetter Cc: Peter Senna Tschudin Signed-off-by: Liu Ying Tested-by: Peter Senna Tschudin Tested-by: Lucas Stach Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 9f7dafc..7bf90e9 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -171,10 +171,34 @@ static void imx_drm_output_poll_changed(struct drm_device *drm) drm_fbdev_cma_hotplug_event(imxdrm->fbhelper); } +static int imx_drm_atomic_check(struct drm_device *dev, + struct drm_atomic_state *state) +{ + int ret; + + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + ret = drm_atomic_helper_check_planes(dev, state); + if (ret) + return ret; + + /* + * Check modeset again in case crtc_state->mode_changed is + * updated in plane's ->atomic_check callback. + */ + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + return ret; +} + static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = { .fb_create = drm_fb_cma_create, .output_poll_changed = imx_drm_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = imx_drm_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 4ad67d0..29423e75 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -319,13 +319,14 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL; /* - * since we cannot touch active IDMAC channels, we do not support - * resizing the enabled plane or changing its format + * We support resizing active plane or changing its format by + * forcing CRTC mode change and disabling-enabling plane in plane's + * ->atomic_update callback. */ if (old_fb && (state->src_w != old_state->src_w || state->src_h != old_state->src_h || fb->pixel_format != old_fb->pixel_format)) - return -EINVAL; + crtc_state->mode_changed = true; eba = drm_plane_state_to_eba(state); @@ -336,7 +337,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL; if (old_fb && fb->pitches[0] != old_fb->pitches[0]) - return -EINVAL; + crtc_state->mode_changed = true; switch (fb->pixel_format) { case DRM_FORMAT_YUV420: @@ -372,7 +373,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL; if (old_fb && old_fb->pitches[1] != fb->pitches[1]) - return -EINVAL; + crtc_state->mode_changed = true; } return 0; @@ -392,8 +393,14 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, enum ipu_color_space ics; if (old_state->fb) { - ipu_plane_atomic_set_base(ipu_plane, old_state); - return; + struct drm_crtc_state *crtc_state = state->crtc->state; + + if (!crtc_state->mode_changed) { + ipu_plane_atomic_set_base(ipu_plane, old_state); + return; + } + + ipu_disable_plane(plane); } switch (ipu_plane->dp_flow) { -- cgit v0.10.2 From 3dc147359e3dcdf0648f1e2c11f62cfae3160df0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 15:12:54 -0400 Subject: pNFS/flexfiles: Fix an Oopsable condition when connection to the DS fails If the attempt to connect to a DS fails inside ff_layout_pg_init_read or ff_layout_pg_init_write, then we currently end up clearing the layout segment carried by the struct nfs_pageio_descriptor, causing an Oops when we later call into ff_layout_read_pagelist/ff_layout_write_pagelist. The fix is to ensure we return the layout and then retry. Fixes: 446ca2195303 ("pNFS/flexfiles: When initing reads or writes, we...") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ee1c94c..51b5136 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -806,11 +806,14 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_pnfs_ds *ds; + bool fail_return = false; int idx; /* mirrors are sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { - ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); + if (idx+1 == fls->mirror_array_cnt) + fail_return = true; + ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return); if (ds) { *best_idx = idx; return ds; @@ -859,6 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_pnfs_ds *ds; int ds_idx; +retry: /* Use full layout for now */ if (!pgio->pg_lseg) ff_layout_pg_get_read(pgio, req, false); @@ -871,10 +875,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); if (!ds) { - if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) - goto out_pnfs; - else + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + /* Sleep for 1 second before retrying */ + ssleep(1); + goto retry; } mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); @@ -890,12 +897,6 @@ out_mds: pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_read_mds(pgio); - return; - -out_pnfs: - pnfs_set_lo_fail(pgio->pg_lseg); - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; } static void @@ -909,6 +910,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, int i; int status; +retry: if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, @@ -940,10 +942,13 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, for (i = 0; i < pgio->pg_mirror_count; i++) { ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); if (!ds) { - if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) - goto out_pnfs; - else + if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + /* Sleep for 1 second before retrying */ + ssleep(1); + goto retry; } pgm = &pgio->pg_mirrors[i]; mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); @@ -956,12 +961,6 @@ out_mds: pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_write_mds(pgio); - return; - -out_pnfs: - pnfs_set_lo_fail(pgio->pg_lseg); - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; } static unsigned int diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 970efba..f7a3f6b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -379,7 +379,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, devid = &mirror->mirror_ds->id_node; if (ff_layout_test_devid_unavailable(devid)) - goto out; + goto out_fail; ds = mirror->mirror_ds->ds; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ @@ -405,15 +405,16 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].rsize = max_payload; if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) mirror->mirror_ds->ds_versions[0].wsize = max_payload; - } else { - ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), - mirror, lseg->pls_range.offset, - lseg->pls_range.length, NFS4ERR_NXIO, - OP_ILLEGAL, GFP_NOIO); - if (fail_return || !ff_layout_has_available_ds(lseg)) - pnfs_error_mark_layout_for_return(ino, lseg); - ds = NULL; + goto out; } + ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), + mirror, lseg->pls_range.offset, + lseg->pls_range.length, NFS4ERR_NXIO, + OP_ILLEGAL, GFP_NOIO); +out_fail: + if (fail_return || !ff_layout_has_available_ds(lseg)) + pnfs_error_mark_layout_for_return(ino, lseg); + ds = NULL; out: return ds; } -- cgit v0.10.2 From ea78d80866ce375defb2fdd1c8a3aafec95e0f85 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Aug 2016 13:51:39 +1000 Subject: xfs: track log done items directly in the deferred pending work item Christoph reports slab corruption when a deferred refcount update aborts during _defer_finish(). The cause of this was broken log item state tracking in xfs_defer_pending -- upon an abort, _defer_trans_abort() will call abort_intent on all intent items, including the ones that have already had a done item attached. This is incorrect because each intent item has 2 refcount: the first is released when the intent item is committed to the log; and the second is released when the _done_ item is committed to the log, or by the intent creator if there is no done item. In other words, once we log the done item, responsibility for releasing the intent item's second refcount is transferred to the done item and /must not/ be performed by anything else. The dfp_committed flag should have been tracking whether or not we had a done item so that _defer_trans_abort could decide if it needs to abort the intent item, but due to a thinko this was not the case. Rip it out and track the done item directly so that we do the right thing w.r.t. intent item freeing. Signed-off-by: Darrick J. Wong Reported-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 054a203..c221d0e 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -194,7 +194,7 @@ xfs_defer_trans_abort( /* Abort intent items. */ list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { trace_xfs_defer_pending_abort(tp->t_mountp, dfp); - if (dfp->dfp_committed) + if (!dfp->dfp_done) dfp->dfp_type->abort_intent(dfp->dfp_intent); } @@ -290,7 +290,6 @@ xfs_defer_finish( struct xfs_defer_pending *dfp; struct list_head *li; struct list_head *n; - void *done_item = NULL; void *state; int error = 0; void (*cleanup_fn)(struct xfs_trans *, void *, int); @@ -309,19 +308,11 @@ xfs_defer_finish( if (error) goto out; - /* Mark all pending intents as committed. */ - list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) { - if (dfp->dfp_committed) - break; - trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp); - dfp->dfp_committed = true; - } - /* Log an intent-done item for the first pending item. */ dfp = list_first_entry(&dop->dop_pending, struct xfs_defer_pending, dfp_list); trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); - done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, + dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, dfp->dfp_count); cleanup_fn = dfp->dfp_type->finish_cleanup; @@ -331,7 +322,7 @@ xfs_defer_finish( list_del(li); dfp->dfp_count--; error = dfp->dfp_type->finish_item(*tp, dop, li, - done_item, &state); + dfp->dfp_done, &state); if (error) { /* * Clean up after ourselves and jump out. @@ -428,8 +419,8 @@ xfs_defer_add( dfp = kmem_alloc(sizeof(struct xfs_defer_pending), KM_SLEEP | KM_NOFS); dfp->dfp_type = defer_op_types[type]; - dfp->dfp_committed = false; dfp->dfp_intent = NULL; + dfp->dfp_done = NULL; dfp->dfp_count = 0; INIT_LIST_HEAD(&dfp->dfp_work); list_add_tail(&dfp->dfp_list, &dop->dop_intake); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index cc3981c..e96533d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -30,8 +30,8 @@ struct xfs_defer_op_type; struct xfs_defer_pending { const struct xfs_defer_op_type *dfp_type; /* function pointers */ struct list_head dfp_list; /* pending items */ - bool dfp_committed; /* committed trans? */ void *dfp_intent; /* log intent item */ + void *dfp_done; /* log done item */ struct list_head dfp_work; /* work items */ unsigned int dfp_count; /* # extent items */ }; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7e88bec..d303a66 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2295,7 +2295,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class, __entry->dev = mp ? mp->m_super->s_dev : 0; __entry->type = dfp->dfp_type->type; __entry->intent = dfp->dfp_intent; - __entry->committed = dfp->dfp_committed; + __entry->committed = dfp->dfp_done != NULL; __entry->nr = dfp->dfp_count; ), TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", -- cgit v0.10.2 From a474478642d57641ea06645104a15acc0420f01a Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 29 Aug 2016 17:51:24 +0200 Subject: drm/imx: fix crtc vblank state regression The atomic conversion lost the notification to let the DRM core know about the current state of the CRTC vblank interrupts. This regressed the ability of the core to reject page flip attempts on currently disabled CRTCs. Add back the notifications. Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 08e188b..462056e 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -76,6 +76,8 @@ static void ipu_crtc_disable(struct drm_crtc *crtc) crtc->state->event = NULL; } spin_unlock_irq(&crtc->dev->event_lock); + + drm_crtc_vblank_off(crtc); } static void imx_drm_crtc_reset(struct drm_crtc *crtc) @@ -175,6 +177,8 @@ static int ipu_crtc_atomic_check(struct drm_crtc *crtc, static void ipu_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { + drm_crtc_vblank_on(crtc); + spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { WARN_ON(drm_crtc_vblank_get(crtc)); -- cgit v0.10.2 From 2c5575401e34de3d2fc90af1c95bc73435784093 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 26 Aug 2016 16:28:39 -0500 Subject: usb: musb: Fix locking errors for host only mode If we have USB gadgets disabled and USB_MUSB_HOST set, we get errors "possible irq lock inverssion dependency detected" errors during boot. Let's fix the issue by adding start_musb flag and start the controller after we're out of the spinlock protected section. Reported-by: Ladislav Michl Tested-by: Ladislav Michl Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 192248f..fe08e77 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -290,6 +290,7 @@ int musb_hub_control( u32 temp; int retval = 0; unsigned long flags; + bool start_musb = false; spin_lock_irqsave(&musb->lock, flags); @@ -390,7 +391,7 @@ int musb_hub_control( * logic relating to VBUS power-up. */ if (!hcd->self.is_b_host && musb_has_gadget(musb)) - musb_start(musb); + start_musb = true; break; case USB_PORT_FEAT_RESET: musb_port_reset(musb, true); @@ -451,5 +452,9 @@ error: retval = -EPIPE; } spin_unlock_irqrestore(&musb->lock, flags); + + if (start_musb) + musb_start(musb); + return retval; } -- cgit v0.10.2 From 8c57cac1457f3125a5d13dc03635c0708c61bff0 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 20 Jul 2016 10:24:02 +0300 Subject: mei: me: disable driver on SPT SPS firmware Sunrise Point PCH with SPS Firmware doesn't expose working MEI interface, we need to quirk it out. The SPS Firmware is identifiable only on the first PCI function of the device. Cc: #4.6+ Tested-by: Sujith Pandel Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index e2fb44c..dc3a854 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1263,8 +1263,14 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev) static bool mei_me_fw_type_sps(struct pci_dev *pdev) { u32 reg; - /* Read ME FW Status check for SPS Firmware */ - pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + unsigned int devfn; + + /* + * Read ME FW Status register to check for SPS Firmware + * The SPS FW is only signaled in pci function 0 + */ + devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, ®); trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg); /* if bits [19:16] = 15, running SPS Firmware */ return (reg & 0xf0000) == 0xf0000; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 64e64da..71cea9b 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -85,8 +85,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)}, -- cgit v0.10.2 From 52442f9b11b7e5d4a38d99143011831fd171f8d9 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 30 Aug 2016 09:20:32 -0400 Subject: NFS4: Avoid migration loops If a server returns itself as a location while migrating, the client may end up getting stuck attempting to migrate twice to the same server. Catch this by checking if the nfs_client found is the same as the existing client. For the other two callers to nfs4_set_client, the nfs_client will always be ERR_PTR(-EINVAL). Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8d7d08d..cd3b7cf 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -817,6 +817,11 @@ static int nfs4_set_client(struct nfs_server *server, goto error; } + if (server->nfs_client == clp) { + error = -ELOOP; + goto error; + } + /* * Query for the lease time on clientid setup or renewal * -- cgit v0.10.2 From 98b0f80c2396224bbbed81792b526e6c72ba9efa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Aug 2016 11:15:36 -0400 Subject: NFSv4.x: Fix a refcount leak in nfs_callback_up_net On error, the callers expect us to return without bumping nn->cb_users[]. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.7+ diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a7f2e6e..52a2831 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, err_socks: svc_rpcb_cleanup(serv, net); err_bind: + nn->cb_users[minorversion]--; dprintk("NFS: Couldn't create callback socket: err = %d; " "net = %p\n", ret, net); return ret; -- cgit v0.10.2 From 9ebae9e4bcd7dff22536af8a969d8f66e6f23900 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 30 Aug 2016 16:47:02 +0100 Subject: pata_ninja32: Avoid corrupting status flags Ninja32 needs to set some flags to indicate it does 32bit IO. However it currently assigns this which loses the initializing flag and causes a warning spew. Fix it to use a logical or as is intended. Signed-off-by: Alan Cox Tested-by: Ellmar Stelnberger Signed-off-by: Tejun Heo diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c index 633aa29..44f97ad 100644 --- a/drivers/ata/pata_ninja32.c +++ b/drivers/ata/pata_ninja32.c @@ -144,7 +144,7 @@ static int ninja32_init_one(struct pci_dev *dev, const struct pci_device_id *id) ap->ioaddr.altstatus_addr = base + 0x1E; ap->ioaddr.bmdma_addr = base; ata_sff_std_ports(&ap->ioaddr); - ap->pflags = ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE; + ap->pflags |= ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE; ninja32_program(base); /* FIXME: Should we disable them at remove ? */ -- cgit v0.10.2 From 0d025d271e55f3de21f0aaaf54b42d20404d2b23 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 30 Aug 2016 08:04:16 -0500 Subject: mm/usercopy: get rid of CONFIG_DEBUG_STRICT_USER_COPY_CHECKS There are three usercopy warnings which are currently being silenced for gcc 4.6 and newer: 1) "copy_from_user() buffer size is too small" compile warning/error This is a static warning which happens when object size and copy size are both const, and copy size > object size. I didn't see any false positives for this one. So the function warning attribute seems to be working fine here. Note this scenario is always a bug and so I think it should be changed to *always* be an error, regardless of CONFIG_DEBUG_STRICT_USER_COPY_CHECKS. 2) "copy_from_user() buffer size is not provably correct" compile warning This is another static warning which happens when I enable __compiletime_object_size() for new compilers (and CONFIG_DEBUG_STRICT_USER_COPY_CHECKS). It happens when object size is const, but copy size is *not*. In this case there's no way to compare the two at build time, so it gives the warning. (Note the warning is a byproduct of the fact that gcc has no way of knowing whether the overflow function will be called, so the call isn't dead code and the warning attribute is activated.) So this warning seems to only indicate "this is an unusual pattern, maybe you should check it out" rather than "this is a bug". I get 102(!) of these warnings with allyesconfig and the __compiletime_object_size() gcc check removed. I don't know if there are any real bugs hiding in there, but from looking at a small sample, I didn't see any. According to Kees, it does sometimes find real bugs. But the false positive rate seems high. 3) "Buffer overflow detected" runtime warning This is a runtime warning where object size is const, and copy size > object size. All three warnings (both static and runtime) were completely disabled for gcc 4.6 with the following commit: 2fb0815c9ee6 ("gcc4: disable __compiletime_object_size for GCC 4.6+") That commit mistakenly assumed that the false positives were caused by a gcc bug in __compiletime_object_size(). But in fact, __compiletime_object_size() seems to be working fine. The false positives were instead triggered by #2 above. (Though I don't have an explanation for why the warnings supposedly only started showing up in gcc 4.6.) So remove warning #2 to get rid of all the false positives, and re-enable warnings #1 and #3 by reverting the above commit. Furthermore, since #1 is a real bug which is detected at compile time, upgrade it to always be an error. Having done all that, CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is no longer needed. Signed-off-by: Josh Poimboeuf Cc: Kees Cook Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Steven Rostedt Cc: Brian Gerst Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Byungchul Park Cc: Nilay Vaish Signed-off-by: Linus Torvalds diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index cd87781..af12c2d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -1,6 +1,5 @@ config PARISC def_bool y - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_MIGHT_HAVE_PC_PARPORT select HAVE_IDE select HAVE_OPROFILE diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig index 1a8f6f95..f6a4c01 100644 --- a/arch/parisc/configs/c8000_defconfig +++ b/arch/parisc/configs/c8000_defconfig @@ -245,7 +245,6 @@ CONFIG_DEBUG_RT_MUTEXES=y CONFIG_PROVE_RCU_DELAY=y CONFIG_DEBUG_BLOCK_EXT_DEVT=y CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_KEYS=y # CONFIG_CRYPTO_HW is not set CONFIG_FONTS=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index 7e07926..c564e6e 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -291,7 +291,6 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 0f59fd9..e915048 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -208,13 +208,13 @@ unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned lo #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user -extern void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS - __compiletime_error("copy_from_user() buffer size is not provably correct") -#else - __compiletime_warning("copy_from_user() buffer size is not provably correct") -#endif -; +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, @@ -223,10 +223,12 @@ static inline unsigned long __must_check copy_from_user(void *to, int sz = __compiletime_object_size(to); int ret = -EFAULT; - if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n)) + if (likely(sz == -1 || sz >= n)) ret = __copy_from_user(to, from, n); - else - copy_from_user_overflow(); + else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); return ret; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e751fe2..c109f07 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -68,7 +68,6 @@ config DEBUG_RODATA config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 26e0c7f..412b1bd 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -602,7 +602,6 @@ CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_IRQSOFF_TRACER=y CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 24879da..bec279e 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -552,7 +552,6 @@ CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_CPU_NOTIFIER_ERROR_INJECT=m CONFIG_PM_NOTIFIER_ERROR_INJECT=m CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_KPROBE_EVENT is not set CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index a5c1e5f..1751446 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -549,7 +549,6 @@ CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 73610f2..2d40ef0 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -172,7 +172,6 @@ CONFIG_DEBUG_NOTIFIERS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9b49cf1..95aefdb 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -311,6 +311,14 @@ int __get_user_bad(void) __attribute__((noreturn)); #define __put_user_unaligned __put_user #define __get_user_unaligned __get_user +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} + /** * copy_to_user: - Copy a block of data into user space. * @to: Destination address, in user space. @@ -332,12 +340,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n) return __copy_to_user(to, from, n); } -void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -__compiletime_warning("copy_from_user() buffer size is not provably correct") -#endif -; - /** * copy_from_user: - Copy a block of data from user space. * @to: Destination address, in kernel space. @@ -362,7 +364,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) might_fault(); if (unlikely(sz != -1 && sz < n)) { - copy_from_user_overflow(); + if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); return n; } return __copy_from_user(to, from, n); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 4820a02..78da75b 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -4,7 +4,6 @@ config TILE def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_WANT_FRAME_POINTERS diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index 0a9c4265..a77369e 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -416,14 +416,13 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -/* - * There are still unprovable places in the generic code as of 2.6.34, so this - * option is not really compatible with -Werror, which is more useful in - * general. - */ -extern void copy_from_user_overflow(void) - __compiletime_warning("copy_from_user() size is not provably correct"); +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, @@ -433,14 +432,13 @@ static inline unsigned long __must_check copy_from_user(void *to, if (likely(sz == -1 || sz >= n)) n = _copy_from_user(to, from, n); + else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - copy_from_user_overflow(); + __bad_copy_user(); return n; } -#else -#define copy_from_user _copy_from_user -#endif #ifdef __tilegx__ /** diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c580d8c..2a1f0ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,7 +24,6 @@ config X86 select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a0ae610..c3f2911 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -697,43 +697,14 @@ unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long __must_check _copy_to_user(void __user *to, const void *from, unsigned n); -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS -# define copy_user_diag __compiletime_error -#else -# define copy_user_diag __compiletime_warning -#endif - -extern void copy_user_diag("copy_from_user() buffer size is too small") -copy_from_user_overflow(void); -extern void copy_user_diag("copy_to_user() buffer size is too small") -copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); - -#undef copy_user_diag - -#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS - -extern void -__compiletime_warning("copy_from_user() buffer size is not provably correct") -__copy_from_user_overflow(void) __asm__("copy_from_user_overflow"); -#define __copy_from_user_overflow(size, count) __copy_from_user_overflow() - -extern void -__compiletime_warning("copy_to_user() buffer size is not provably correct") -__copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); -#define __copy_to_user_overflow(size, count) __copy_to_user_overflow() - -#else +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); -static inline void -__copy_from_user_overflow(int size, unsigned long count) +static inline void copy_user_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } -#define __copy_to_user_overflow __copy_from_user_overflow - -#endif - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { @@ -743,31 +714,13 @@ copy_from_user(void *to, const void __user *from, unsigned long n) kasan_check_write(to, n); - /* - * While we would like to have the compiler do the checking for us - * even in the non-constant size case, any false positives there are - * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even - * without - the [hopefully] dangerous looking nature of the warning - * would make people go look at the respecitive call sites over and - * over again just to find that there's no problem). - * - * And there are cases where it's just not realistic for the compiler - * to prove the count to be in range. For example when multiple call - * sites of a helper function - perhaps in different source files - - * all doing proper range checking, yet the helper function not doing - * so again. - * - * Therefore limit the compile time checking to the constant size - * case, and do only runtime checking for non-constant sizes. - */ - if (likely(sz < 0 || sz >= n)) { check_object_size(to, n, false); n = _copy_from_user(to, from, n); - } else if (__builtin_constant_p(n)) - copy_from_user_overflow(); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - __copy_from_user_overflow(sz, n); + __bad_copy_user(); return n; } @@ -781,21 +734,17 @@ copy_to_user(void __user *to, const void *from, unsigned long n) might_fault(); - /* See the comment in copy_from_user() above. */ if (likely(sz < 0 || sz >= n)) { check_object_size(from, n, true); n = _copy_to_user(to, from, n); - } else if (__builtin_constant_p(n)) - copy_to_user_overflow(); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); else - __copy_to_user_overflow(sz, n); + __bad_copy_user(); return n; } -#undef __copy_from_user_overflow -#undef __copy_to_user_overflow - /* * We rely on the nested NMI work to allow atomic faults from the NMI path; the * nested NMI paths are careful to preserve CR2. diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 1bfa602..5dea1fb 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -72,6 +72,7 @@ struct exception_table_entry /* Returns 0 if exception not found and fixup otherwise. */ extern unsigned long search_exception_table(unsigned long); + /* * architectures with an MMU should override these two */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 8dbc892..573c5a1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -158,7 +158,7 @@ #define __compiler_offsetof(a, b) \ __builtin_offsetof(a, b) -#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 +#if GCC_VERSION >= 40100 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2307d7c..2e2cca5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1686,24 +1686,6 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. -config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS - bool - -config DEBUG_STRICT_USER_COPY_CHECKS - bool "Strict user copy size checks" - depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS - depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING - help - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time failures. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, say N. - source kernel/trace/Kconfig menu "Runtime Testing" diff --git a/lib/Makefile b/lib/Makefile index cfa68eb..5dc77a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -24,7 +24,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o -obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_HAS_DMA) += dma-noop.o diff --git a/lib/usercopy.c b/lib/usercopy.c deleted file mode 100644 index 4f5b1dd..0000000 --- a/lib/usercopy.c +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include -#include - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); -- cgit v0.10.2 From a5d60783df61fbb67b7596b8a0f6b4b2e05251d5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 30 Aug 2016 16:11:53 -0400 Subject: dm log writes: move IO accounting earlier to fix error path Move log_one_block()'s atomic_inc(&lc->io_blocks) before bio_alloc() to fix a bug that the target hangs if bio_alloc() fails. The error path does put_io_block(lc), so atomic_inc(&lc->io_blocks) must occur before invoking the error path to avoid underflow of lc->io_blocks. Signed-off-by: Mikulas Patocka Reviewed-by: Josef Bacik Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 4ab6803..4cc78ae 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -259,12 +259,12 @@ static int log_one_block(struct log_writes_c *lc, goto out; sector++; + atomic_inc(&lc->io_blocks); bio = bio_alloc(GFP_KERNEL, block->vec_cnt); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; } - atomic_inc(&lc->io_blocks); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio->bi_bdev = lc->logdev->bdev; -- cgit v0.10.2 From 7efb367320f56fc4d549875b6f3a6940018ef2e5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 30 Aug 2016 16:20:55 -0400 Subject: dm log writes: fix bug with too large bios bio_alloc() can allocate a bio with at most BIO_MAX_PAGES (256) vector entries. However, the incoming bio may have more vector entries if it was allocated by other means. For example, bcache submits bios with more than BIO_MAX_PAGES entries. This results in bio_alloc() failure. To avoid the failure, change the code so that it allocates bio with at most BIO_MAX_PAGES entries. If the incoming bio has more entries, bio_add_page() will fail and a new bio will be allocated - the code that handles bio_add_page() failure already exists in the dm-log-writes target. Signed-off-by: Mikulas Patocka Reviewed-by: Josef Bacik Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v4.1+ diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 4cc78ae..ba24f4f 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -260,7 +260,7 @@ static int log_one_block(struct log_writes_c *lc, sector++; atomic_inc(&lc->io_blocks); - bio = bio_alloc(GFP_KERNEL, block->vec_cnt); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; @@ -282,7 +282,7 @@ static int log_one_block(struct log_writes_c *lc, if (ret != block->vecs[i].bv_len) { atomic_inc(&lc->io_blocks); submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; -- cgit v0.10.2 From 485a252a5559b45d7df04c819ec91177c62c270b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Aug 2016 16:28:09 -0700 Subject: seccomp: Fix tracer exit notifications during fatal signals This fixes a ptrace vs fatal pending signals bug as manifested in seccomp now that seccomp was reordered to happen after ptrace. The short version is that seccomp should not attempt to call do_exit() while fatal signals are pending under a tracer. The existing code was trying to be as defensively paranoid as possible, but it now ends up confusing ptrace. Instead, the syscall can just be skipped (which solves the original concern that the do_exit() was addressing) and normal signal handling, tracer notification, and process death can happen. Paraphrasing from the original bug report: If a tracee task is in a PTRACE_EVENT_SECCOMP trap, or has been resumed after such a trap but not yet been scheduled, and another task in the thread-group calls exit_group(), then the tracee task exits without the ptracer receiving a PTRACE_EVENT_EXIT notification. Test case here: https://gist.github.com/khuey/3c43ac247c72cef8c956ca73281c9be7 The bug happens because when __seccomp_filter() detects fatal_signal_pending(), it calls do_exit() without dequeuing the fatal signal. When do_exit() sends the PTRACE_EVENT_EXIT notification and that task is descheduled, __schedule() notices that there is a fatal signal pending and changes its state from TASK_TRACED to TASK_RUNNING. That prevents the ptracer's waitpid() from returning the ptrace event. A more detailed analysis is here: https://github.com/mozilla/rr/issues/1762#issuecomment-237396255. Reported-by: Robert O'Callahan Reported-by: Kyle Huey Tested-by: Kyle Huey Fixes: 93e35efb8de4 ("x86/ptrace: run seccomp after ptrace") Signed-off-by: Kees Cook Acked-by: Oleg Nesterov Acked-by: James Morris diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ef6c6c3..0db7c8a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, ptrace_event(PTRACE_EVENT_SECCOMP, data); /* * The delivery of a fatal signal during event - * notification may silently skip tracer notification. - * Terminating the task now avoids executing a system - * call that may not be intended. + * notification may silently skip tracer notification, + * which could leave us with a potentially unmodified + * syscall that the tracer would have liked to have + * changed. Since the process is about to die, we just + * force the syscall to be skipped and let the signal + * kill the process and correctly handle any tracer exit + * notifications. */ if (fatal_signal_pending(current)) - do_exit(SIGSYS); + goto skip; /* Check if the tracer forced the syscall to be skipped. */ this_syscall = syscall_get_nr(current, task_pt_regs(current)); if (this_syscall < 0) -- cgit v0.10.2 From 91e630d9ae6de6f740ef7c8176736eb55366833e Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 10 Mar 2016 01:22:19 +0200 Subject: dm log writes: fix check of kthread_run() return value The kthread_run() function returns either a valid task_struct or ERR_PTR() value, check for NULL is invalid. This change fixes potential for oops, e.g. in OOM situation. Signed-off-by: Vladimir Zapolskiy Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index ba24f4f..49e4d8d 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -459,9 +459,9 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ret = -EINVAL; lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); - if (!lc->log_kthread) { + if (IS_ERR(lc->log_kthread)) { + ret = PTR_ERR(lc->log_kthread); ti->error = "Couldn't alloc kthread"; dm_put_device(ti, lc->dev); dm_put_device(ti, lc->logdev); -- cgit v0.10.2 From 4e870e948fbabf62b78e8410f04c67703e7c816b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 30 Aug 2016 16:38:42 -0400 Subject: dm crypt: fix error with too large bios When dm-crypt processes writes, it allocates a new bio in crypt_alloc_buffer(). The bio is allocated from a bio set and it can have at most BIO_MAX_PAGES vector entries, however the incoming bio can be larger (e.g. if it was allocated by bcache). If the incoming bio is larger, bio_alloc_bioset() fails and an error is returned. To avoid the error, we test for a too large bio in the function crypt_map() and use dm_accept_partial_bio() to split the bio. dm_accept_partial_bio() trims the current bio to the desired size and asks DM core to send another bio with the rest of the data. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v3.16+ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index eedba67..d609566 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1924,6 +1924,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } + /* + * Check if bio is too large, split as needed. + */ + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + bio_data_dir(bio) == WRITE) + dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); io->ctx.req = (struct skcipher_request *)(io + 1); -- cgit v0.10.2 From 5d0be84ec0cacfc7a6d6ea548afdd07d481324cd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 30 Aug 2016 09:51:44 -0700 Subject: dm crypt: fix free of bad values after tfm allocation failure If crypt_alloc_tfms() had to allocate multiple tfms and it failed before the last allocation, then it would call crypt_free_tfms() and could free pointers from uninitialized memory -- due to the crypt_free_tfms() check for non-zero cc->tfms[i]. Fix by allocating zeroed memory. Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d609566..8742957 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1453,7 +1453,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) unsigned i; int err; - cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), + cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), GFP_KERNEL); if (!cc->tfms) return -ENOMEM; -- cgit v0.10.2 From edd1ea2a8a2549e4fe58e817d539445729491ecf Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 30 Aug 2016 22:19:11 +0530 Subject: dm bufio: remove use of deprecated create_singlethread_workqueue() The workqueue "dm_bufio_wq" queues a single work item &dm_bufio_work so it doesn't require execution ordering. Hence, alloc_workqueue() has been used to replace the deprecated create_singlethread_workqueue(). The WQ_MEM_RECLAIM flag has been set since DM requires forward progress under memory pressure. Since there are fixed number of work items, explicit concurrency limit is unnecessary here. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 6571c81..8625040 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1879,7 +1879,7 @@ static int __init dm_bufio_init(void) __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); - dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache"); + dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0); if (!dm_bufio_wq) return -ENOMEM; -- cgit v0.10.2 From bd37e022e334757a5dc1dae41baa29e16befe4ec Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 21 Aug 2016 15:41:44 +0000 Subject: cpufreq: dt: Add terminate entry for of_device_id tables Make sure of_device_id tables are NULL terminated. Signed-off-by: Wei Yongjun Acked-by: Viresh Kumar Fixes: f56aad1d98f1 (cpufreq: dt: Add generic platform-device creation support) CC: 4.7+ # 4.7+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 0bb44d5..2ee40fd 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -74,6 +74,8 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "ti,omap5", }, { .compatible = "xlnx,zynq-7000", }, + + { } }; static int __init cpufreq_dt_platdev_init(void) -- cgit v0.10.2 From d44c950e9398e639e124014e5872480a37b67259 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 13:38:55 -0700 Subject: PM / runtime: Add _rcuidle suffix to allow rpm_resume() to be called from idle This commit applies another _rcuidle suffix to fix an RCU use from idle. > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1122 Not tainted > ------------------------------- > include/trace/events/rpm.h:69 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [] __pm_runtime_resume+0x3c/0x64 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1122 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [] (unwind_backtrace) from [] (show_stack+0x10/0x14) > [] (show_stack) from [] (dump_stack+0xb0/0xe4) > [] (dump_stack) from [] (rpm_resume+0x5cc/0x7f4) > [] (rpm_resume) from [] (__pm_runtime_resume+0x4c/0x64) > [] (__pm_runtime_resume) from [] (omap2_gpio_resume_after_idle+0x54/0x68) > [] (omap2_gpio_resume_after_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec) > [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x80/0x3d4) > [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x198/0x3a0) > [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) > [] (start_kernel) from [<8000807c>] (0x8000807c) Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index e097d35..76127e1 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -601,7 +601,7 @@ static int rpm_resume(struct device *dev, int rpmflags) struct device *parent = NULL; int retval = 0; - trace_rpm_resume(dev, rpmflags); + trace_rpm_resume_rcuidle(dev, rpmflags); repeat: if (dev->power.runtime_error) @@ -764,7 +764,7 @@ static int rpm_resume(struct device *dev, int rpmflags) spin_lock_irq(&dev->power.lock); } - trace_rpm_return_int(dev, _THIS_IP_, retval); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); return retval; } -- cgit v0.10.2 From d7737ce964d944dd07e25b0f569edcd550ede18c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 13:03:51 -0700 Subject: PM / runtime: Add _rcuidle suffix to allow rpm_idle() use from idle This commit appends a few _rcuidle suffixes to fix the following RCU-used-from-idle bug: > =============================== > [ INFO: suspicious RCU usage. ] > 4.6.0-rc5-next-20160426+ #1116 Not tainted > ------------------------------- > include/trace/events/rpm.h:95 suspicious rcu_dereference_check() usage! > > other info that might help us debug this: > > > RCU used illegally from idle CPU! > rcu_scheduler_active = 1, debug_locks = 0 > RCU used illegally from extended quiescent state! > 1 lock held by swapper/0/0: > #0: (&(&dev->power.lock)->rlock){-.-...}, at: [] __rpm_callback+0x58/0x60 > > stack backtrace: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1116 > Hardware name: Generic OMAP36xx (Flattened Device Tree) > [] (unwind_backtrace) from [] (show_stack+0x10/0x14) > [] (show_stack) from [] (dump_stack+0xb0/0xe4) > [] (dump_stack) from [] (rpm_suspend+0x580/0x768) > [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84) > [] (__pm_runtime_suspend) from [] (omap2_gpio_prepare_for_idle+0x5c/0x70) > [] (omap2_gpio_prepare_for_idle) from [] (omap_sram_idle+0x140/0x244) > [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec) > [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x80/0x3d4) > [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x198/0x3a0) > [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) > [] (start_kernel) from [<8000807c>] (0x8000807c) In the immortal words of Steven Rostedt, "*Whack* *Whack* *Whack*!!!" Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck WhACKED-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 76127e1..17995fa 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -301,7 +301,7 @@ static int rpm_idle(struct device *dev, int rpmflags) int (*callback)(struct device *); int retval; - trace_rpm_idle(dev, rpmflags); + trace_rpm_idle_rcuidle(dev, rpmflags); retval = rpm_check_suspend_allowed(dev); if (retval < 0) ; /* Conditions are wrong. */ @@ -337,7 +337,7 @@ static int rpm_idle(struct device *dev, int rpmflags) dev->power.request_pending = true; queue_work(pm_wq, &dev->power.work); } - trace_rpm_return_int(dev, _THIS_IP_, 0); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0); return 0; } @@ -352,7 +352,7 @@ static int rpm_idle(struct device *dev, int rpmflags) wake_up_all(&dev->power.wait_queue); out: - trace_rpm_return_int(dev, _THIS_IP_, retval); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO); } -- cgit v0.10.2 From 279cf3f23870f7eb8ca071115e06d3d5ca0a2b9e Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Fri, 26 Aug 2016 01:00:54 +0200 Subject: drm/nouveau/acpi: use DSM if bridge does not support D3cold Even if PR3 support is available on the bridge, it will not be used if the PCI layer considers it unavailable (i.e. on all laptops from 2013 and 2014). Ensure that this condition is checked to allow a fallback to the Optimus DSM for device poweroff. Initially I wanted to call pci_d3cold_enable before checking bridge_d3 (in case the user changed d3cold_allowed), but that is such an unlikely case and likely fragile anyway. The current patch is suggested by Mika in http://www.spinics.net/lists/linux-pci/msg52599.html Cc: Mika Westerberg Signed-off-by: Peter Wu Reviewed-by: Mika Westerberg Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index f2ad17a..dc57b62 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -225,6 +225,17 @@ static bool nouveau_pr3_present(struct pci_dev *pdev) if (!parent_pdev) return false; + if (!parent_pdev->bridge_d3) { + /* + * Parent PCI bridge is currently not power managed. + * Since userspace can change these afterwards to be on + * the safe side we stick with _DSM and prevent usage of + * _PR3 from the bridge. + */ + pci_d3cold_disable(pdev); + return false; + } + parent_adev = ACPI_COMPANION(&parent_pdev->dev); if (!parent_adev) return false; -- cgit v0.10.2 From c012268b37db6b10b59dac9b7f45956cb9a8bcb2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 5 Aug 2016 14:25:53 -0700 Subject: lkdtm: Mark lkdtm_rodata_do_nothing() notrace lkdtm_rodata_do_nothing() is an empty function which is generated in order to test the non-executability of rodata. Currently if function tracing is enabled then an mcount callsite will be generated for lkdtm_rodata_do_nothing(), and it will appear in the list of available functions for function tracing (available_filter_functions). Given it's purpose purely as a test function, it seems preferable for lkdtm_rodata_do_nothing() to be marked notrace, so it doesn't appear as traceable. This also avoids triggering a linker bug on powerpc: https://sourceware.org/bugzilla/show_bug.cgi?id=20428 When the linker sees code that needs to generate a call stub, eg. a branch to mcount(), it assumes the section is executable and dereferences a NULL pointer leading to a linker segfault. Marking lkdtm_rodata_do_nothing() notrace avoids triggering the bug because the function contains no other function calls. Signed-off-by: Michael Ellerman Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/misc/lkdtm_rodata.c b/drivers/misc/lkdtm_rodata.c index 166b1db..3564477 100644 --- a/drivers/misc/lkdtm_rodata.c +++ b/drivers/misc/lkdtm_rodata.c @@ -4,7 +4,7 @@ */ #include "lkdtm.h" -void lkdtm_rodata_do_nothing(void) +void notrace lkdtm_rodata_do_nothing(void) { /* Does nothing. We just want an architecture agnostic "return". */ } -- cgit v0.10.2 From 25eb7e5c7454c9e8407c5382b9fe32d3b45fe163 Mon Sep 17 00:00:00 2001 From: Andreas Noever Date: Tue, 26 Jul 2016 18:40:37 +0200 Subject: thunderbolt: Fix resume quirk for Falcon Ridge 4C. The quirk 'quirk_apple_wait_for_thunderbolt' did not fire on Falcon Ridge 4C controllers with subdevice/subvendor set to zero. This lead to lost pci devices on system resume. Older thunderbolt controllers (pre Falcon Ridge) used the same device id for bridges and for the controller. On Apple hardware the subvendor- & subdevice-ids were set for the controller, but not for bridges. So that is what was used to differentiate between the two. Starting with Falcon Ridge bridges and controllers received different device ids. Additionally on some MacBookPro models (but not all) the subvendor/subdevice was zeroed. Starting with a42fb351c (thunderbolt: Allow loading of module on recent Apple MacBooks with thunderbolt 2 controller) the thunderbolt driver binds to all Falcon Ridge 4C controllers (irregardless of subvendor/subdevice). The corresponding quirk was not updated. This commit changes the quirk to check the device class instead of its subvendor-/subdeviceids. This works for all generations of Thunderbolt controllers. Signed-off-by: Andreas Noever Reviewed-by: Lukas Wunner Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 37ff015..6ff6469 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3328,8 +3328,7 @@ static void quirk_apple_wait_for_thunderbolt(struct pci_dev *dev) || (nhi->device != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE && nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI) - || nhi->subsystem_vendor != 0x2222 - || nhi->subsystem_device != 0x1111) + || nhi->class != PCI_CLASS_SYSTEM_OTHER << 8) goto out; dev_info(&dev->dev, "quirk: waiting for thunderbolt to reestablish PCI tunnels...\n"); device_pm_wait_for_dev(&dev->dev, &nhi->dev); -- cgit v0.10.2 From 82a6a81c2a38aa7a7813a0c532637877773c50ae Mon Sep 17 00:00:00 2001 From: Xavier Gnata Date: Tue, 26 Jul 2016 18:40:38 +0200 Subject: thunderbolt: Add support for INTEL_FALCON_RIDGE_2C controller. From: Xavier Gnata Add support to INTEL_FALCON_RIDGE_2C controller and corresponding quirk to support suspend/resume. Tested against 4.7 master on a MacBook Air 11" 2015. Signed-off-by: Andreas Noever Reviewed-by: Lukas Wunner Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6ff6469..44e0ff3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3327,6 +3327,7 @@ static void quirk_apple_wait_for_thunderbolt(struct pci_dev *dev) if (nhi->vendor != PCI_VENDOR_ID_INTEL || (nhi->device != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE && nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && + nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI && nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI) || nhi->class != PCI_CLASS_SYSTEM_OTHER << 8) goto out; @@ -3343,6 +3344,9 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C, quirk_apple_wait_for_thunderbolt); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE, + quirk_apple_wait_for_thunderbolt); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE, quirk_apple_wait_for_thunderbolt); #endif diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 9c15344..a8c2041 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -651,6 +651,12 @@ static struct pci_device_id nhi_ids[] = { { .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0, .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI, + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, + }, + { + .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0, + .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, -- cgit v0.10.2 From ccdf3b888d87df1b914fedde91ed1848f0651c65 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 3 Aug 2016 10:44:12 +0200 Subject: thunderbolt: Don't declare Falcon Ridge unsupported Falcon Ridge 4C has been supported by the driver from the beginning, Falcon Ridge 2C support was just added. Don't irritate users with a warning declaring the opposite. Signed-off-by: Lukas Wunner Signed-off-by: Andreas Noever Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 1e116f5..9840fde 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -372,7 +372,9 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, u64 route) if (sw->config.device_id != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE && sw->config.device_id != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C && - sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE) + sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE && + sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE && + sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE) tb_sw_warn(sw, "unsupported switch device id %#x\n", sw->config.device_id); -- cgit v0.10.2 From df6a58c5c5aa8ecb1e088ecead3fa33ae70181f1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2016 17:51:17 -0400 Subject: kernfs: don't depend on d_find_any_alias() when generating notifications kernfs_notify_workfn() sends out file modified events for the scheduled kernfs_nodes. Because the modifications aren't from userland, it doesn't have the matching file struct at hand and can't use fsnotify_modify(). Instead, it looked up the inode and then used d_find_any_alias() to find the dentry and used fsnotify_parent() and fsnotify() directly to generate notifications. The assumption was that the relevant dentries would have been pinned if there are listeners, which isn't true as inotify doesn't pin dentries at all and watching the parent doesn't pin the child dentries even for dnotify. This led to, for example, inotify watchers not getting notifications if the system is under memory pressure and the matching dentries got reclaimed. It can also be triggered through /proc/sys/vm/drop_caches or a remount attempt which involves shrinking dcache. fsnotify_parent() only uses the dentry to access the parent inode, which kernfs can do easily. Update kernfs_notify_workfn() so that it uses fsnotify() directly for both the parent and target inodes without going through d_find_any_alias(). While at it, supply the target file name to fsnotify() from kernfs_node->name. Signed-off-by: Tejun Heo Reported-by: Evgeny Vereshchagin Fixes: d911d9874801 ("kernfs: make kernfs_notify() trigger inotify events too") Cc: John McCutchan Cc: Robert Love Cc: Eric Paris Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Greg Kroah-Hartman diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e157400..2bcb86e 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -840,21 +840,35 @@ repeat: mutex_lock(&kernfs_mutex); list_for_each_entry(info, &kernfs_root(kn)->supers, node) { + struct kernfs_node *parent; struct inode *inode; - struct dentry *dentry; + /* + * We want fsnotify_modify() on @kn but as the + * modifications aren't originating from userland don't + * have the matching @file available. Look up the inodes + * and generate the events manually. + */ inode = ilookup(info->sb, kn->ino); if (!inode) continue; - dentry = d_find_any_alias(inode); - if (dentry) { - fsnotify_parent(NULL, dentry, FS_MODIFY); - fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, - NULL, 0); - dput(dentry); + parent = kernfs_get_parent(kn); + if (parent) { + struct inode *p_inode; + + p_inode = ilookup(info->sb, parent->ino); + if (p_inode) { + fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD, + inode, FSNOTIFY_EVENT_INODE, kn->name, 0); + iput(p_inode); + } + + kernfs_put(parent); } + fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, + kn->name, 0); iput(inode); } -- cgit v0.10.2 From 96b0af4b729cabd44e237c5a6b9bd4e0ea4ed457 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Fri, 20 May 2016 14:23:38 +0200 Subject: documentation: drivers/core/of: fix name of of_node symlink commit 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") added a symlink called "of_node" to sysfs however the documentation describes it as "of_path". Fix the documentation to match what the code actually does. Signed-off-by: Martin Fuzzey Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman diff --git a/Documentation/ABI/stable/sysfs-devices b/Documentation/ABI/stable/sysfs-devices index 43f78b88d..df449d7 100644 --- a/Documentation/ABI/stable/sysfs-devices +++ b/Documentation/ABI/stable/sysfs-devices @@ -1,7 +1,7 @@ # Note: This documents additional properties of any device beyond what # is documented in Documentation/sysfs-rules.txt -What: /sys/devices/*/of_path +What: /sys/devices/*/of_node Date: February 2015 Contact: Device Tree mailing list Description: -- cgit v0.10.2 From 17d0774f80681020eccc9638d925a23f1fc4f671 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 22 Jun 2016 21:42:16 +0300 Subject: sysfs: correctly handle read offset on PREALLOC attrs Attributes declared with __ATTR_PREALLOC use sysfs_kf_read() which returns zero bytes for non-zero offset. This breaks script checkarray in mdadm tool in debian where /bin/sh is 'dash' because its builtin 'read' reads only one byte at a time. Script gets 'i' instead of 'idle' when reads current action from /sys/block/$dev/md/sync_action and as a result does nothing. This patch adds trivial implementation of partial read: generate whole string and move required part into buffer head. Signed-off-by: Konstantin Khlebnikov Fixes: 4ef67a8c95f3 ("sysfs/kernfs: make read requests on pre-alloc files use the buffer.") Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=787950 Cc: Stable # v3.19+ Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index f35523d..b803213 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -114,9 +114,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, * If buf != of->prealloc_buf, we don't know how * large it is, so cannot safely pass it to ->show */ - if (pos || WARN_ON_ONCE(buf != of->prealloc_buf)) + if (WARN_ON_ONCE(buf != of->prealloc_buf)) return 0; len = ops->show(kobj, of->kn->priv, buf); + if (pos) { + if (len <= pos) + return 0; + len -= pos; + memmove(buf, buf + pos, len); + } return min(count, len); } -- cgit v0.10.2 From 5db4f7f80d165fc9725f356e99feec409e446baa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 16 Aug 2016 15:06:54 +0300 Subject: Revert "tty/serial/8250: use mctrl_gpio helpers" Serial console is broken in v4.8-rcX. Mika and I independently bisected down to commit 4ef03d328769 ("tty/serial/8250: use mctrl_gpio helpers"). Since neither author nor anyone else didn't propose a solution we better revert it for now. This reverts commit 4ef03d328769eddbfeca1f1c958fdb181a69c341. Link: https://lkml.kernel.org/r/20160809130229.GN1729@lahna.fi.intel.com Signed-off-by: Andy Shevchenko Tested-by: Heikki Krogerus Tested-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt index f5561ac..936ab5b 100644 --- a/Documentation/devicetree/bindings/serial/8250.txt +++ b/Documentation/devicetree/bindings/serial/8250.txt @@ -42,9 +42,6 @@ Optional properties: - auto-flow-control: one way to enable automatic flow control support. The driver is allowed to detect support for the capability even without this property. -- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD - line respectively. It will use specified GPIO instead of the peripheral - function pin for the UART feature. If unsure, don't specify this property. Note: * fsl,ns16550: @@ -66,19 +63,3 @@ Example: interrupts = <10>; reg-shift = <2>; }; - -Example for OMAP UART using GPIO-based modem control signals: - - uart4: serial@49042000 { - compatible = "ti,omap3-uart"; - reg = <0x49042000 0x400>; - interrupts = <80>; - ti,hwmods = "uart4"; - clock-frequency = <48000000>; - cts-gpios = <&gpio3 5 GPIO_ACTIVE_LOW>; - rts-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>; - dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>; - dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>; - dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; - rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>; - }; diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 122e0e4..1a16fea 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -15,8 +15,6 @@ #include #include -#include "../serial_mctrl_gpio.h" - struct uart_8250_dma { int (*tx_dma)(struct uart_8250_port *p); int (*rx_dma)(struct uart_8250_port *p); @@ -133,43 +131,12 @@ void serial8250_em485_destroy(struct uart_8250_port *p); static inline void serial8250_out_MCR(struct uart_8250_port *up, int value) { - int mctrl_gpio = 0; - serial_out(up, UART_MCR, value); - - if (value & UART_MCR_RTS) - mctrl_gpio |= TIOCM_RTS; - if (value & UART_MCR_DTR) - mctrl_gpio |= TIOCM_DTR; - - mctrl_gpio_set(up->gpios, mctrl_gpio); } static inline int serial8250_in_MCR(struct uart_8250_port *up) { - int mctrl, mctrl_gpio = 0; - - mctrl = serial_in(up, UART_MCR); - - /* save current MCR values */ - if (mctrl & UART_MCR_RTS) - mctrl_gpio |= TIOCM_RTS; - if (mctrl & UART_MCR_DTR) - mctrl_gpio |= TIOCM_DTR; - - mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio); - - if (mctrl_gpio & TIOCM_RTS) - mctrl |= UART_MCR_RTS; - else - mctrl &= ~UART_MCR_RTS; - - if (mctrl_gpio & TIOCM_DTR) - mctrl |= UART_MCR_DTR; - else - mctrl &= ~UART_MCR_DTR; - - return mctrl; + return serial_in(up, UART_MCR); } #if defined(__alpha__) && !defined(CONFIG_PCI) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 13ad5c3..dcf43f6 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -974,8 +974,6 @@ int serial8250_register_8250_port(struct uart_8250_port *up) uart = serial8250_find_match_or_unused(&up->port); if (uart && uart->port.type != PORT_8250_CIR) { - struct mctrl_gpios *gpios; - if (uart->port.dev) uart_remove_one_port(&serial8250_reg, &uart->port); @@ -1013,13 +1011,6 @@ int serial8250_register_8250_port(struct uart_8250_port *up) if (up->port.flags & UPF_FIXED_TYPE) uart->port.type = up->port.type; - gpios = mctrl_gpio_init(&uart->port, 0); - if (IS_ERR(gpios)) { - if (PTR_ERR(gpios) != -ENOSYS) - return PTR_ERR(gpios); - } else - uart->gpios = gpios; - serial8250_set_defaults(uart); /* Possibly override default I/O functions. */ diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index e14982f..61ad6c3 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -134,21 +134,18 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) serial8250_do_set_mctrl(port, mctrl); - if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios, - UART_GPIO_RTS))) { - /* - * Turn off autoRTS if RTS is lowered and restore autoRTS - * setting if RTS is raised - */ - lcr = serial_in(up, UART_LCR); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS)) - priv->efr |= UART_EFR_RTS; - else - priv->efr &= ~UART_EFR_RTS; - serial_out(up, UART_EFR, priv->efr); - serial_out(up, UART_LCR, lcr); - } + /* + * Turn off autoRTS if RTS is lowered and restore autoRTS setting + * if RTS is raised + */ + lcr = serial_in(up, UART_LCR); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS)) + priv->efr |= UART_EFR_RTS; + else + priv->efr &= ~UART_EFR_RTS; + serial_out(up, UART_EFR, priv->efr); + serial_out(up, UART_LCR, lcr); } /* @@ -449,9 +446,7 @@ static void omap_8250_set_termios(struct uart_port *port, priv->efr = 0; up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF); - if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW - && IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios, - UART_GPIO_RTS))) { + if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW) { /* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */ up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS; priv->efr |= UART_EFR_CTS; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 7481b95..bdfa659 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1618,8 +1618,6 @@ static void serial8250_disable_ms(struct uart_port *port) if (up->bugs & UART_BUG_NOMSR) return; - mctrl_gpio_disable_ms(up->gpios); - up->ier &= ~UART_IER_MSI; serial_port_out(port, UART_IER, up->ier); } @@ -1632,8 +1630,6 @@ static void serial8250_enable_ms(struct uart_port *port) if (up->bugs & UART_BUG_NOMSR) return; - mctrl_gpio_enable_ms(up->gpios); - up->ier |= UART_IER_MSI; serial8250_rpm_get(up); @@ -1917,8 +1913,7 @@ unsigned int serial8250_do_get_mctrl(struct uart_port *port) ret |= TIOCM_DSR; if (status & UART_MSR_CTS) ret |= TIOCM_CTS; - - return mctrl_gpio_get(up->gpios, &ret); + return ret; } EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl); diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index c9ec839..7c6f7af 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -6,7 +6,6 @@ config SERIAL_8250 tristate "8250/16550 and compatible serial support" select SERIAL_CORE - select SERIAL_MCTRL_GPIO if GPIOLIB ---help--- This selects whether you want to include the driver for the standard serial ports. The standard answer is Y. People who might say N diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 923266c..48ec765 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -111,7 +111,6 @@ struct uart_8250_port { * if no_console_suspend */ unsigned char probe; - struct mctrl_gpios *gpios; #define UART_PROBE_RSA (1 << 0) /* -- cgit v0.10.2 From 47b34d2ef266e2c283b514d65c8963c2ccd42474 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Jul 2016 17:21:49 +0300 Subject: serial: 8250_mid: fix divide error bug if baud rate is 0 Since the commit c1a67b48f6a5 ("serial: 8250_pci: replace switch-case by formula for Intel MID"), the 8250 driver crashes in the byt_set_termios() function with a divide error. This is caused by the fact that a baud rate of 0 (B0) is not handled properly. Fix it by falling back to B9600 in this case. Reported-by: "Mendez Salinas, Fernando" Fixes: c1a67b48f6a5 ("serial: 8250_pci: replace switch-case by formula for Intel MID") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c index 339de9c..20c5db2 100644 --- a/drivers/tty/serial/8250/8250_mid.c +++ b/drivers/tty/serial/8250/8250_mid.c @@ -168,6 +168,9 @@ static void mid8250_set_termios(struct uart_port *p, unsigned long w = BIT(24) - 1; unsigned long mul, div; + /* Gracefully handle the B0 case: fall back to B9600 */ + fuart = fuart ? fuart : 9600 * 16; + if (mid->board->freq < fuart) { /* Find prescaler value that satisfies Fuart < Fref */ if (mid->board->freq > baud) -- cgit v0.10.2 From c8d192428f52f244130b84650ad616df09f2b1e1 Mon Sep 17 00:00:00 2001 From: Jimi Damon Date: Wed, 20 Jul 2016 17:00:40 -0700 Subject: serial: 8250: added acces i/o products quad and octal serial cards Added devices ids for acces i/o products quad and octal serial cards that make use of existing Pericom PI7C9X7954 and PI7C9X7958 configurations . Signed-off-by: Jimi Damon Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 20ebaea..bc51b32 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1950,6 +1950,43 @@ pci_wch_ch38x_setup(struct serial_private *priv, #define PCI_DEVICE_ID_PERICOM_PI7C9X7954 0x7954 #define PCI_DEVICE_ID_PERICOM_PI7C9X7958 0x7958 +#define PCI_VENDOR_ID_ACCESIO 0x494f +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB 0x1051 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S 0x1053 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB 0x105C +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S 0x105E +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB 0x1091 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2 0x1093 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB 0x1099 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4 0x109B +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB 0x10D1 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM 0x10D3 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB 0x10DA +#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM 0x10DC +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1 0x1108 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2 0x1110 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2 0x1111 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4 0x1118 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4 0x1119 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S 0x1152 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S 0x115A +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2 0x1190 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2 0x1191 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4 0x1198 +#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4 0x1199 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM 0x11D0 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4 0x105A +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4 0x105B +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8 0x106A +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8 0x106B +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4 0x1098 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8 0x10A9 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM 0x10D9 +#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM 0x10E9 +#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM 0x11D8 + + + /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584 0x1584 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588 0x1588 @@ -5113,6 +5150,108 @@ static struct pci_device_id serial_pci_tbl[] = { 0, 0, pbn_pericom_PI7C9X7958 }, /* + * ACCES I/O Products quad + */ + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7954 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + { PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pericom_PI7C9X7958 }, + /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) */ { PCI_VENDOR_ID_TOPIC, PCI_DEVICE_ID_TOPIC_TP560, -- cgit v0.10.2 From 9f834ec18defc369d73ccf9e87a2790bfa05bf46 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Aug 2016 16:41:46 -0700 Subject: binfmt_elf: switch to new creds when switching to new mm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to delay switching to the new credentials until after we had mapped the executable (and possible elf interpreter). That was kind of odd to begin with, since the new executable will actually then _run_ with the new creds, but whatever. The bigger problem was that we also want to make sure that we turn off prof events and tracing before we start mapping the new executable state. So while this is a cleanup, it's also a fix for a possible information leak. Reported-by: Robert Święcki Tested-by: Peter Zijlstra Acked-by: David Howells Acked-by: Oleg Nesterov Acked-by: Andy Lutomirski Acked-by: Eric W. Biederman Cc: Willy Tarreau Cc: Kees Cook Cc: Al Viro Signed-off-by: Linus Torvalds diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7f6aff3f..e5495f3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -853,6 +853,7 @@ static int load_elf_binary(struct linux_binprm *bprm) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); + install_exec_creds(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ @@ -1044,7 +1045,6 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - install_exec_creds(bprm); retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); if (retval < 0) -- cgit v0.10.2 From 10ea9434065e56fe14287f89258ecf2fb684ed1a Mon Sep 17 00:00:00 2001 From: jimqu Date: Tue, 30 Aug 2016 08:59:42 +0800 Subject: drm/amd/amdgpu: sdma resume fail during S4 on CI SDMA could be fail in the thaw() and restore() processes, do software reset if each SDMA engine is busy. Signed-off-by: JimQu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ee64669..77fdd99 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -52,6 +52,7 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev); static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev); static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); +static int cik_sdma_soft_reset(void *handle); MODULE_FIRMWARE("radeon/bonaire_sdma.bin"); MODULE_FIRMWARE("radeon/bonaire_sdma1.bin"); @@ -1037,6 +1038,8 @@ static int cik_sdma_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cik_sdma_soft_reset(handle); + return cik_sdma_hw_init(adev); } -- cgit v0.10.2 From 53960b4f89db58bc155d6f8aa0a44ccc59ccb26f Mon Sep 17 00:00:00 2001 From: jimqu Date: Tue, 30 Aug 2016 09:03:16 +0800 Subject: drm/amd/amdgpu: compute ring test fail during S4 on CI unhalt Instrction Fetch Unit after all rings are inited. Signed-off-by: JimQu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d869d05..425413f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2755,8 +2755,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) u64 wb_gpu_addr; u32 *buf; struct bonaire_mqd *mqd; - - gfx_v7_0_cp_compute_enable(adev, true); + struct amdgpu_ring *ring; /* fix up chicken bits */ tmp = RREG32(mmCP_CPF_DEBUG); @@ -2791,7 +2790,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) /* init the queues. Just two for now. */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + ring = &adev->gfx.compute_ring[i]; if (ring->mqd_obj == NULL) { r = amdgpu_bo_create(adev, @@ -2970,6 +2969,13 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) amdgpu_bo_unreserve(ring->mqd_obj); ring->ready = true; + } + + gfx_v7_0_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; -- cgit v0.10.2 From 1f703e6679f373f5bba4efe7093aa82e91af4037 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 30 Aug 2016 17:59:11 +0800 Subject: drm/amdgpu: record error code when ring test failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we may miss errors. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a31d7ef..ec1282a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -280,7 +280,7 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { unsigned i; - int r; + int r, ret = 0; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -301,10 +301,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) } else { /* still not good, but we can live with it */ DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); + ret = r; } } } - return 0; + return ret; } /* -- cgit v0.10.2 From cd81a9170e69e018bbaba547c1fd85a585f5697a Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 23 Aug 2016 16:20:38 +0200 Subject: mm: introduce get_task_exe_file For more convenient access if one has a pointer to the task. As a minor nit take advantage of the fact that only task lock + rcu are needed to safely grab ->exe_file. This saves mm refcount dance. Use the helper in proc_exe_link. Signed-off-by: Mateusz Guzik Acked-by: Konstantin Khlebnikov Acked-by: Richard Guy Briggs Cc: # 4.3.x Signed-off-by: Paul Moore diff --git a/fs/proc/base.c b/fs/proc/base.c index 0d163a8..da8b194 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1552,18 +1552,13 @@ static const struct file_operations proc_pid_set_comm_operations = { static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; - struct mm_struct *mm; struct file *exe_file; task = get_proc_task(d_inode(dentry)); if (!task) return -ENOENT; - mm = get_task_mm(task); + exe_file = get_task_exe_file(task); put_task_struct(task); - if (!mm) - return -ENOENT; - exe_file = get_mm_exe_file(mm); - mmput(mm); if (exe_file) { *exe_path = exe_file->f_path; path_get(&exe_file->f_path); diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f468e0..004c73a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1987,6 +1987,7 @@ extern void mm_drop_all_locks(struct mm_struct *mm); extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); +extern struct file *get_task_exe_file(struct task_struct *task); extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages); extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); diff --git a/kernel/fork.c b/kernel/fork.c index d277e83..42451ae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -774,6 +774,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm) EXPORT_SYMBOL(get_mm_exe_file); /** + * get_task_exe_file - acquire a reference to the task's executable file + * + * Returns %NULL if task's mm (if any) has no associated executable file or + * this is a kernel thread with borrowed mm (see the comment above get_task_mm). + * User must release file via fput(). + */ +struct file *get_task_exe_file(struct task_struct *task) +{ + struct file *exe_file = NULL; + struct mm_struct *mm; + + task_lock(task); + mm = task->mm; + if (mm) { + if (!(task->flags & PF_KTHREAD)) + exe_file = get_mm_exe_file(mm); + } + task_unlock(task); + return exe_file; +} +EXPORT_SYMBOL(get_task_exe_file); + +/** * get_task_mm - acquire a reference to the task's mm * * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning -- cgit v0.10.2 From 5efc244346f9f338765da3d592f7947b0afdc4b5 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 23 Aug 2016 16:20:39 +0200 Subject: audit: fix exe_file access in audit_exe_compare Prior to the change the function would blindly deference mm, exe_file and exe_file->f_inode, each of which could have been NULL or freed. Use get_task_exe_file to safely obtain stable exe_file. Signed-off-by: Mateusz Guzik Acked-by: Konstantin Khlebnikov Acked-by: Richard Guy Briggs Cc: # 4.3.x Signed-off-by: Paul Moore diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3cf1c59..4846691 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -19,6 +19,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark) unsigned long ino; dev_t dev; - rcu_read_lock(); - exe_file = rcu_dereference(tsk->mm->exe_file); + exe_file = get_task_exe_file(tsk); + if (!exe_file) + return 0; ino = exe_file->f_inode->i_ino; dev = exe_file->f_inode->i_sb->s_dev; - rcu_read_unlock(); + fput(exe_file); return audit_mark_compare(mark, ino, dev); } -- cgit v0.10.2 From 38b256973ea90fc7c2b7e1b734fa0e8b83538d50 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: handle umask and posix_acl_default correctly on creation Setting MS_POSIXACL in sb->s_flags has the side effect of passing mode to create functions without masking against umask. Another problem when creating over a whiteout is that the default posix acl is not inherited from the parent dir (because the real parent dir at the time of creation is the work directory). Fix these problems by: a) If upper fs does not have MS_POSIXACL, then mask mode with umask. b) If creating over a whiteout, call posix_acl_create() to get the inherited acls. After creation (but before moving to the final destination) set these acls on the created file. posix_acl_create() also updates the file creation mode as appropriate. Fixes: 39a25b2b3762 ("ovl: define ->get_acl() for overlay inodes") Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 12bcd07..f485dd4 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "overlayfs.h" void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) @@ -186,6 +188,9 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct dentry *newdentry; int err; + if (!hardlink && !IS_POSIXACL(udir)) + stat->mode &= ~current_umask(); + inode_lock_nested(udir, I_MUTEX_PARENT); newdentry = lookup_one_len(dentry->d_name.name, upperdir, dentry->d_name.len); @@ -335,6 +340,32 @@ out_free: return ret; } +static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, + const struct posix_acl *acl) +{ + void *buffer; + size_t size; + int err; + + if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl) + return 0; + + size = posix_acl_to_xattr(NULL, acl, NULL, 0); + buffer = kmalloc(size, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + err = size; + if (err < 0) + goto out_free; + + err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE); +out_free: + kfree(buffer); + return err; +} + static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct kstat *stat, const char *link, struct dentry *hardlink) @@ -346,10 +377,18 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct dentry *upper; struct dentry *newdentry; int err; + struct posix_acl *acl, *default_acl; if (WARN_ON(!workdir)) return -EROFS; + if (!hardlink) { + err = posix_acl_create(dentry->d_parent->d_inode, + &stat->mode, &default_acl, &acl); + if (err) + return err; + } + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -384,6 +423,17 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out_cleanup; } + if (!hardlink) { + err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS, + acl); + if (err) + goto out_cleanup; + + err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT, + default_acl); + if (err) + goto out_cleanup; + } if (!hardlink && S_ISDIR(stat->mode)) { err = ovl_set_opaque(newdentry); @@ -410,6 +460,10 @@ out_dput: out_unlock: unlock_rename(workdir, upperdir); out: + if (!hardlink) { + posix_acl_release(acl); + posix_acl_release(default_acl); + } return err; out_cleanup: -- cgit v0.10.2 From c11b9fdd6a612f376a5e886505f1c54c16d8c380 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: remove posix_acl_default from workdir Clear out posix acl xattrs on workdir and also reset the mode after creation so that an inherited sgid bit is cleared. Signed-off-by: Miklos Szeredi Cc: diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4036132..452fb71 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -814,6 +814,10 @@ retry: struct kstat stat = { .mode = S_IFDIR | 0, }; + struct iattr attr = { + .ia_valid = ATTR_MODE, + .ia_mode = stat.mode, + }; if (work->d_inode) { err = -EEXIST; @@ -829,6 +833,21 @@ retry: err = ovl_create_real(dir, work, &stat, NULL, NULL, true); if (err) goto out_dput; + + err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); + if (err && err != -ENODATA) + goto out_dput; + + err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); + if (err && err != -ENODATA) + goto out_dput; + + /* Clear any inherited mode bits */ + inode_lock(work->d_inode); + err = notify_change(work, &attr, NULL); + inode_unlock(work->d_inode); + if (err) + goto out_dput; } out_unlock: inode_unlock(dir); -- cgit v0.10.2 From eea2fb4851e9dcbab6b991aaf47e2e024f1f55a0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: proper cleanup of workdir When mounting overlayfs it needs a clean "work" directory under the supplied workdir. Previously the mount code removed this directory if it already existed and created a new one. If the removal failed (e.g. directory was not empty) then it fell back to a read-only mount not using the workdir. While this has never been reported, it is possible to get a non-empty "work" dir from a previous mount of overlayfs in case of crash in the middle of an operation using the work directory. In this case the left over state should be discarded and the overlay filesystem will be consistent, guaranteed by the atomicity of operations on moving to/from the workdir to the upper layer. This patch implements cleaning out any files left in workdir. It is implemented using real recursion for simplicity, but the depth is limited to 2, because the worst case is that of a directory containing whiteouts under "work". Signed-off-by: Miklos Szeredi Cc: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 34839bd..9a95e2c 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -179,6 +179,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); void ovl_cache_free(struct list_head *list); int ovl_check_d_type_supported(struct path *realpath); +void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, + struct dentry *dentry, int level); /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index cf37fc7..f241b4e 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -248,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath, err = rdd->err; } while (!err && rdd->count); - if (!err && rdd->first_maybe_whiteout) + if (!err && rdd->first_maybe_whiteout && rdd->dentry) err = ovl_check_whiteouts(realpath->dentry, rdd); fput(realfile); @@ -606,3 +606,64 @@ int ovl_check_d_type_supported(struct path *realpath) return rdd.d_type_supported; } + +static void ovl_workdir_cleanup_recurse(struct path *path, int level) +{ + int err; + struct inode *dir = path->dentry->d_inode; + LIST_HEAD(list); + struct ovl_cache_entry *p; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .dentry = NULL, + .list = &list, + .root = RB_ROOT, + .is_lowest = false, + }; + + err = ovl_dir_read(path, &rdd); + if (err) + goto out; + + inode_lock_nested(dir, I_MUTEX_PARENT); + list_for_each_entry(p, &list, l_node) { + struct dentry *dentry; + + if (p->name[0] == '.') { + if (p->len == 1) + continue; + if (p->len == 2 && p->name[1] == '.') + continue; + } + dentry = lookup_one_len(p->name, path->dentry, p->len); + if (IS_ERR(dentry)) + continue; + if (dentry->d_inode) + ovl_workdir_cleanup(dir, path->mnt, dentry, level); + dput(dentry); + } + inode_unlock(dir); +out: + ovl_cache_free(&list); +} + +void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, + struct dentry *dentry, int level) +{ + int err; + + if (!d_is_dir(dentry) || level > 1) { + ovl_cleanup(dir, dentry); + return; + } + + err = ovl_do_rmdir(dir, dentry); + if (err) { + struct path path = { .mnt = mnt, .dentry = dentry }; + + inode_unlock(dir); + ovl_workdir_cleanup_recurse(&path, level + 1); + inode_lock_nested(dir, I_MUTEX_PARENT); + ovl_cleanup(dir, dentry); + } +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 452fb71..219534e 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -825,7 +825,7 @@ retry: goto out_dput; retried = true; - ovl_cleanup(dir, work); + ovl_workdir_cleanup(dir, mnt, work, 0); dput(work); goto retry; } -- cgit v0.10.2 From 5201dc449e4b6b6d7e92f7f974269b11681f98b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: use cached acl on underlying layer Instead of calling ->get_acl() directly, use get_acl() to get the cached value. We will have the acl cached on the underlying inode anyway, because we do permission checking on the both the overlay and the underlying fs. So, since we already have double caching, this improves performance without any cost. Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 024352f..d50d1ea 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "overlayfs.h" static int ovl_copy_up_truncate(struct dentry *dentry) @@ -314,14 +315,14 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) const struct cred *old_cred; struct posix_acl *acl; - if (!IS_POSIXACL(realinode)) + if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; if (!realinode->i_op->get_acl) return NULL; old_cred = ovl_override_creds(inode->i_sb); - acl = realinode->i_op->get_acl(realinode, type); + acl = get_acl(realinode, type); revert_creds(old_cred); return acl; -- cgit v0.10.2 From 2a3a2a3f35249412e35fbb48b743348c40373409 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:11:59 +0200 Subject: ovl: don't cache acl on overlay layer Some operations (setxattr/chmod) can make the cached acl stale. We either need to clear overlay's acl cache for the affected inode or prevent acl caching on the overlay altogether. Preventing caching has the following advantages: - no double caching, less memory used - overlay cache doesn't go stale when fs clears it's own cache Possible disadvantage is performance loss. If that becomes a problem get_acl() can be optimized for overlayfs. This patch disables caching by pre setting i_*acl to a value that - has bit 0 set, so is_uncached_acl() will return true - is not equal to ACL_NOT_CACHED, so get_acl() will not overwrite it The constant -3 was chosen for this purpose. Fixes: 39a25b2b3762 ("ovl: define ->get_acl() for overlay inodes") Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index d50d1ea..47a4f33 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -416,6 +416,9 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode) inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; +#ifdef CONFIG_FS_POSIX_ACL + inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; +#endif mode &= S_IFMT; switch (mode) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 3523bf6..901e25d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -574,6 +574,7 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +#define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * uncached_acl_sentinel(struct task_struct *task) -- cgit v0.10.2 From fd36570a8805f39b40a0ebde19b08603aa201d17 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 18 Aug 2016 16:58:35 +0100 Subject: ovl: fix spelling mistake: "directries" -> "directories" Trivial fix to spelling mistake in pr_err message. Signed-off-by: Colin Ian King Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 219534e..6aad7d4 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1151,7 +1151,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); if (stacklen > OVL_MAX_STACK) { - pr_err("overlayfs: too many lower directries, limit is %d\n", + pr_err("overlayfs: too many lower directories, limit is %d\n", OVL_MAX_STACK); goto out_free_lowertmp; } else if (!ufs->config.upperdir && stacklen == 1) { -- cgit v0.10.2 From fe2b75952347762a21f67d9df1199137ae5988b2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 17:59:22 +0200 Subject: ovl: Fix OVL_XATTR_PREFIX Make sure ovl_own_xattr_handler only matches attribute names starting with "overlay.", not "overlayXXX". Signed-off-by: Andreas Gruenbacher Fixes: d837a49bd57f ("ovl: fix POSIX ACL setting") Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 47a4f33..f523511 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -194,9 +194,8 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) bool ovl_is_private_xattr(const char *name) { -#define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "." - return strncmp(name, OVL_XATTR_PRE_NAME, - sizeof(OVL_XATTR_PRE_NAME) - 1) == 0; + return strncmp(name, OVL_XATTR_PREFIX, + sizeof(OVL_XATTR_PREFIX) - 1) == 0; } int ovl_setxattr(struct dentry *dentry, struct inode *inode, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 9a95e2c..f50c390 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -24,8 +24,8 @@ enum ovl_path_type { (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) -#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay" -#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX ".opaque" +#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." +#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" #define OVL_ISUPPER_MASK 1UL -- cgit v0.10.2 From 0c97be22f928b85110504c4bbb8574facb4bd0c0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 16:36:49 +0200 Subject: ovl: Get rid of ovl_xattr_noacl_handlers array Use an ordinary #ifdef to conditionally include the POSIX ACL handlers in ovl_xattr_handlers, like the other filesystems do. Flag the code that is now only used conditionally with __maybe_unused. Signed-off-by: Andreas Gruenbacher Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 6aad7d4..c356191 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -986,10 +986,11 @@ static unsigned int ovl_split_lowerdirs(char *str) return ctr; } -static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) +static int __maybe_unused +ovl_posix_acl_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) { struct dentry *workdir = ovl_workdir(dentry); struct inode *realinode = ovl_inode_real(inode, NULL); @@ -1040,13 +1041,15 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, return -EPERM; } -static const struct xattr_handler ovl_posix_acl_access_xattr_handler = { +static const struct xattr_handler __maybe_unused +ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, .set = ovl_posix_acl_xattr_set, }; -static const struct xattr_handler ovl_posix_acl_default_xattr_handler = { +static const struct xattr_handler __maybe_unused +ovl_posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, .set = ovl_posix_acl_xattr_set, @@ -1063,19 +1066,15 @@ static const struct xattr_handler ovl_other_xattr_handler = { }; static const struct xattr_handler *ovl_xattr_handlers[] = { +#ifdef CONFIG_FS_POSIX_ACL &ovl_posix_acl_access_xattr_handler, &ovl_posix_acl_default_xattr_handler, +#endif &ovl_own_xattr_handler, &ovl_other_xattr_handler, NULL }; -static const struct xattr_handler *ovl_xattr_noacl_handlers[] = { - &ovl_own_xattr_handler, - &ovl_other_xattr_handler, - NULL, -}; - static int ovl_fill_super(struct super_block *sb, void *data, int silent) { struct path upperpath = { NULL, NULL }; @@ -1288,10 +1287,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_op = &ovl_super_operations; - if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) - sb->s_xattr = ovl_xattr_handlers; - else - sb->s_xattr = ovl_xattr_noacl_handlers; + sb->s_xattr = ovl_xattr_handlers; sb->s_root = root_dentry; sb->s_fs_info = ufs; sb->s_flags |= MS_POSIXACL; -- cgit v0.10.2 From 0e585ccc13b3edbb187fb4f1b7cc9397f17d64a9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 17:22:11 +0200 Subject: ovl: Switch to generic_removexattr Commit d837a49bd57f ("ovl: fix POSIX ACL setting") switches from iop->setxattr from ovl_setxattr to generic_setxattr, so switch from ovl_removexattr to generic_removexattr as well. As far as permission checking goes, the same rules should apply in either case. While doing that, rename ovl_setxattr to ovl_xattr_set to indicate that this is not an iop->setxattr implementation and remove the unused inode argument. Move ovl_other_xattr_set above ovl_own_xattr_set so that they match the order of handlers in ovl_xattr_handlers. Signed-off-by: Andreas Gruenbacher Fixes: d837a49bd57f ("ovl: fix POSIX ACL setting") Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f485dd4..791c6a2 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1006,7 +1006,7 @@ const struct inode_operations ovl_dir_inode_operations = { .setxattr = generic_setxattr, .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, - .removexattr = ovl_removexattr, + .removexattr = generic_removexattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, }; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f523511..94bca71 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -198,25 +198,38 @@ bool ovl_is_private_xattr(const char *name) sizeof(OVL_XATTR_PREFIX) - 1) == 0; } -int ovl_setxattr(struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) +int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { int err; - struct dentry *upperdentry; + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); const struct cred *old_cred; err = ovl_want_write(dentry); if (err) goto out; + if (!value && !OVL_TYPE_UPPER(type)) { + err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (err < 0) + goto out_drop_write; + } + err = ovl_copy_up(dentry); if (err) goto out_drop_write; - upperdentry = ovl_dentry_upper(dentry); + if (!OVL_TYPE_UPPER(type)) + ovl_path_upper(dentry, &realpath); + old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_setxattr(upperdentry, name, value, size, flags); + if (value) + err = vfs_setxattr(realpath.dentry, name, value, size, flags); + else { + WARN_ON(flags != XATTR_REPLACE); + err = vfs_removexattr(realpath.dentry, name); + } revert_creds(old_cred); out_drop_write: @@ -272,42 +285,6 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; } -int ovl_removexattr(struct dentry *dentry, const char *name) -{ - int err; - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); - const struct cred *old_cred; - - err = ovl_want_write(dentry); - if (err) - goto out; - - err = -ENODATA; - if (ovl_is_private_xattr(name)) - goto out_drop_write; - - if (!OVL_TYPE_UPPER(type)) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); - if (err < 0) - goto out_drop_write; - - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; - - ovl_path_upper(dentry, &realpath); - } - - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_removexattr(realpath.dentry, name); - revert_creds(old_cred); -out_drop_write: - ovl_drop_write(dentry); -out: - return err; -} - struct posix_acl *ovl_get_acl(struct inode *inode, int type) { struct inode *realinode = ovl_inode_real(inode, NULL); @@ -393,7 +370,7 @@ static const struct inode_operations ovl_file_inode_operations = { .setxattr = generic_setxattr, .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, - .removexattr = ovl_removexattr, + .removexattr = generic_removexattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, }; @@ -406,7 +383,7 @@ static const struct inode_operations ovl_symlink_inode_operations = { .setxattr = generic_setxattr, .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, - .removexattr = ovl_removexattr, + .removexattr = generic_removexattr, .update_time = ovl_update_time, }; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f50c390..5769aaf 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -185,13 +185,11 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_permission(struct inode *inode, int mask); -int ovl_setxattr(struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags); +int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags); ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); -int ovl_removexattr(struct dentry *dentry, const char *name); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index c356191..45a2eb0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1018,21 +1018,13 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, posix_acl_release(acl); - return ovl_setxattr(dentry, inode, handler->name, value, size, flags); + return ovl_xattr_set(dentry, handler->name, value, size, flags); out_acl_release: posix_acl_release(acl); return err; } -static int ovl_other_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -{ - return ovl_setxattr(dentry, inode, name, value, size, flags); -} - static int ovl_own_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1041,6 +1033,14 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, return -EPERM; } +static int ovl_other_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + return ovl_xattr_set(dentry, name, value, size, flags); +} + static const struct xattr_handler __maybe_unused ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, -- cgit v0.10.2 From ce31513a9114f74fe3e9caa6534d201bdac7238d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:12:00 +0200 Subject: ovl: copyattr after setting POSIX ACL Setting POSIX acl may also modify the file mode, so need to copy that up to the overlay inode. Reported-by: Eryu Guan Fixes: d837a49bd57f ("ovl: fix POSIX ACL setting") Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 45a2eb0..cba2c9f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1018,7 +1018,11 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, posix_acl_release(acl); - return ovl_xattr_set(dentry, handler->name, value, size, flags); + err = ovl_xattr_set(dentry, handler->name, value, size, flags); + if (!err) + ovl_copyattr(ovl_inode_real(inode, NULL), inode); + + return err; out_acl_release: posix_acl_release(acl); -- cgit v0.10.2 From 0eb45fc3bb7a2cf9c9c93d9e95986a841e5f4625 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 22 Aug 2016 17:52:55 +0200 Subject: ovl: Switch to generic_getxattr Now that overlayfs has xattr handlers for iop->{set,remove}xattr, use those same handlers for iop->getxattr as well. Signed-off-by: Andreas Gruenbacher Signed-off-by: Miklos Szeredi diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 791c6a2..1560fdc 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1004,7 +1004,7 @@ const struct inode_operations ovl_dir_inode_operations = { .permission = ovl_permission, .getattr = ovl_dir_getattr, .setxattr = generic_setxattr, - .getxattr = ovl_getxattr, + .getxattr = generic_getxattr, .listxattr = ovl_listxattr, .removexattr = generic_removexattr, .get_acl = ovl_get_acl, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 94bca71..1878591 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -238,16 +238,13 @@ out: return err; } -ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size) +int ovl_xattr_get(struct dentry *dentry, const char *name, + void *value, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; const struct cred *old_cred; - if (ovl_is_private_xattr(name)) - return -ENODATA; - old_cred = ovl_override_creds(dentry->d_sb); res = vfs_getxattr(realdentry, name, value, size); revert_creds(old_cred); @@ -368,7 +365,7 @@ static const struct inode_operations ovl_file_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .setxattr = generic_setxattr, - .getxattr = ovl_getxattr, + .getxattr = generic_getxattr, .listxattr = ovl_listxattr, .removexattr = generic_removexattr, .get_acl = ovl_get_acl, @@ -381,7 +378,7 @@ static const struct inode_operations ovl_symlink_inode_operations = { .readlink = ovl_readlink, .getattr = ovl_getattr, .setxattr = generic_setxattr, - .getxattr = ovl_getxattr, + .getxattr = generic_getxattr, .listxattr = ovl_listxattr, .removexattr = generic_removexattr, .update_time = ovl_update_time, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 5769aaf..5813ccf 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -187,8 +187,8 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_permission(struct inode *inode, int mask); int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size); +int ovl_xattr_get(struct dentry *dentry, const char *name, + void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index cba2c9f..a4585f9 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -987,6 +987,14 @@ static unsigned int ovl_split_lowerdirs(char *str) } static int __maybe_unused +ovl_posix_acl_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return ovl_xattr_get(dentry, handler->name, buffer, size); +} + +static int __maybe_unused ovl_posix_acl_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1029,6 +1037,13 @@ out_acl_release: return err; } +static int ovl_own_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return -EPERM; +} + static int ovl_own_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1037,6 +1052,13 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler, return -EPERM; } +static int ovl_other_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return ovl_xattr_get(dentry, name, buffer, size); +} + static int ovl_other_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -1049,6 +1071,7 @@ static const struct xattr_handler __maybe_unused ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, + .get = ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; @@ -1056,16 +1079,19 @@ static const struct xattr_handler __maybe_unused ovl_posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, + .get = ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; static const struct xattr_handler ovl_own_xattr_handler = { .prefix = OVL_XATTR_PREFIX, + .get = ovl_own_xattr_get, .set = ovl_own_xattr_set, }; static const struct xattr_handler ovl_other_xattr_handler = { .prefix = "", /* catch all */ + .get = ovl_other_xattr_get, .set = ovl_other_xattr_set, }; -- cgit v0.10.2 From 7cb35119d067191ce9ebc380a599db0b03cbd9d9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:12:00 +0200 Subject: ovl: listxattr: use strnlen() Be defensive about what underlying fs provides us in the returned xattr list buffer. If it's not properly null terminated, bail out with a warning insead of BUG. Signed-off-by: Miklos Szeredi Cc: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1878591..c75625c 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -255,7 +255,8 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; - int off; + size_t len; + char *s; const struct cred *old_cred; old_cred = ovl_override_creds(dentry->d_sb); @@ -265,17 +266,19 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; /* filter out private xattrs */ - for (off = 0; off < res;) { - char *s = list + off; - size_t slen = strlen(s) + 1; + for (s = list, len = res; len;) { + size_t slen = strnlen(s, len) + 1; - BUG_ON(off + slen > res); + /* underlying fs providing us with an broken xattr list? */ + if (WARN_ON(slen > len)) + return -EIO; + len -= slen; if (ovl_is_private_xattr(s)) { res -= slen; - memmove(s, s + slen, res - off); + memmove(s, s + slen, len); } else { - off += slen; + s += slen; } } -- cgit v0.10.2 From 026e5e0cc12474495515275d9c176ef823238c70 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 1 Sep 2016 11:12:00 +0200 Subject: ovl: update doc Some of the documented quirks no longer apply. Signed-off-by: Miklos Szeredi diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index d6259c7..bcbf971 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -183,12 +183,10 @@ The copy_up operation essentially creates a new, identical file and moves it over to the old name. The new file may be on a different filesystem, so both st_dev and st_ino of the file may change. -Any open files referring to this inode will access the old data and -metadata. Similarly any file locks obtained before copy_up will not -apply to the copied up file. +Any open files referring to this inode will access the old data. -On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and -fsetxattr(2) will fail with EROFS. +Any file locks (and leases) obtained before copy_up will not apply +to the copied up file. If a file with multiple hard links is copied up, then this will "break" the link. Changes will not be propagated to other names -- cgit v0.10.2 From 3dc09ec895f098cedd789a620c90ff1bf7f779a1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Aug 2016 11:57:52 -0400 Subject: Btrfs: kill invalid ASSERT() in process_all_refs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suppose you have the following tree in snap1 on a file system mounted with -o inode_cache so that inode numbers are recycled └── [ 258] a └── [ 257] b and then you remove b, rename a to c, and then re-create b in c so you have the following tree └── [ 258] c └── [ 257] b and then you try to do an incremental send you will hit ASSERT(pending_move == 0); in process_all_refs(). This is because we assume that any recycling of inodes will not have a pending change in our path, which isn't the case. This is the case for the DELETE side, since we want to remove the old file using the old path, but on the create side we could have a pending move and need to do the normal pending rename dance. So remove this ASSERT() and put a comment about why we ignore pending_move. Thanks, Signed-off-by: Josef Bacik Signed-off-by: David Sterba diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index efe129f..a87675f 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4268,10 +4268,12 @@ static int process_all_refs(struct send_ctx *sctx, } btrfs_release_path(path); + /* + * We don't actually care about pending_move as we are simply + * re-creating this inode and will be rename'ing it into place once we + * rename the parent directory. + */ ret = process_recorded_refs(sctx, &pending_move); - /* Only applicable to an incremental send. */ - ASSERT(pending_move == 0); - out: btrfs_free_path(path); return ret; -- cgit v0.10.2 From a9b1fc851db054ddec703dc7951ed00620600b26 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 31 Aug 2016 16:43:33 -0700 Subject: Btrfs: fix endless loop in balancing block groups Qgroup function may overwrite the saved error 'err' with 0 in case quota is not enabled, and this ends up with a endless loop in balance because we keep going back to balance the same block group. It really should use 'ret' instead. Signed-off-by: Liu Bo Reviewed-by: Qu Wenruo Signed-off-by: David Sterba diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8a2c2a0..c0c13dc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4200,9 +4200,11 @@ restart: err = PTR_ERR(trans); goto out_free; } - err = qgroup_fix_relocated_data_extents(trans, rc); - if (err < 0) { - btrfs_abort_transaction(trans, err); + ret = qgroup_fix_relocated_data_extents(trans, rc); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + if (!err) + err = ret; goto out_free; } btrfs_commit_transaction(trans, rc->extent_root); -- cgit v0.10.2 From e0af24849efb0eea572cf22d22bb65d164cb8a6f Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Wed, 31 Aug 2016 19:46:16 +0800 Subject: btrfs: fix one bug that process may endlessly wait for ticket in wait_reserve_ticket() If can_overcommit() in btrfs_calc_reclaim_metadata_size() returns true, btrfs_async_reclaim_metadata_space() will not reclaim metadata space, just return directly and also forget to wake up process which are waiting for their tickets, so these processes will wait endlessly. Fstests case generic/172 with mount option "-o compress=lzo" have revealed this bug in my test machine. Here if we have tickets to handle, we must handle them first. Signed-off-by: Wang Xiaoguang Reviewed-by: Josef Bacik Signed-off-by: David Sterba diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 60d4ae7..64676a1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4901,11 +4901,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, u64 expected; u64 to_reclaim = 0; - to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); - if (can_overcommit(root, space_info, to_reclaim, - BTRFS_RESERVE_FLUSH_ALL)) - return 0; - list_for_each_entry(ticket, &space_info->tickets, list) to_reclaim += ticket->bytes; list_for_each_entry(ticket, &space_info->priority_tickets, list) @@ -4913,6 +4908,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, if (to_reclaim) return to_reclaim; + to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); + if (can_overcommit(root, space_info, to_reclaim, + BTRFS_RESERVE_FLUSH_ALL)) + return 0; + used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_may_use; -- cgit v0.10.2 From 6b4e3181d7bd5ca5ab6f45929e4a5ffa7ab4ab7f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 1 Sep 2016 16:14:41 -0700 Subject: mm, oom: prevent premature OOM killer invocation for high order request There have been several reports about pre-mature OOM killer invocation in 4.7 kernel when order-2 allocation request (for the kernel stack) invoked OOM killer even during basic workloads (light IO or even kernel compile on some filesystems). In all reported cases the memory is fragmented and there are no order-2+ pages available. There is usually a large amount of slab memory (usually dentries/inodes) and further debugging has shown that there are way too many unmovable blocks which are skipped during the compaction. Multiple reporters have confirmed that the current linux-next which includes [1] and [2] helped and OOMs are not reproducible anymore. A simpler fix for the late rc and stable is to simply ignore the compaction feedback and retry as long as there is a reclaim progress and we are not getting OOM for order-0 pages. We already do that for CONFING_COMPACTION=n so let's reuse the same code when compaction is enabled as well. [1] http://lkml.kernel.org/r/20160810091226.6709-1-vbabka@suse.cz [2] http://lkml.kernel.org/r/f7a9ea9d-bb88-bfd6-e340-3a933559305a@suse.cz Fixes: 0a0337e0d1d1 ("mm, oom: rework oom detection") Link: http://lkml.kernel.org/r/20160823074339.GB23577@dhcp22.suse.cz Signed-off-by: Michal Hocko Tested-by: Olaf Hering Tested-by: Ralf-Peter Rohbeck Cc: Markus Trippelsdorf Cc: Arkadiusz Miskiewicz Cc: Ralf-Peter Rohbeck Cc: Jiri Slaby Cc: Vlastimil Babka Cc: Joonsoo Kim Cc: Tetsuo Handa Cc: David Rientjes Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3fbe73a..7791a03 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3137,54 +3137,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } -static inline bool -should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, - enum compact_result compact_result, - enum compact_priority *compact_priority, - int compaction_retries) -{ - int max_retries = MAX_COMPACT_RETRIES; - - if (!order) - return false; - - /* - * compaction considers all the zone as desperately out of memory - * so it doesn't really make much sense to retry except when the - * failure could be caused by insufficient priority - */ - if (compaction_failed(compact_result)) { - if (*compact_priority > MIN_COMPACT_PRIORITY) { - (*compact_priority)--; - return true; - } - return false; - } - - /* - * make sure the compaction wasn't deferred or didn't bail out early - * due to locks contention before we declare that we should give up. - * But do not retry if the given zonelist is not suitable for - * compaction. - */ - if (compaction_withdrawn(compact_result)) - return compaction_zonelist_suitable(ac, order, alloc_flags); - - /* - * !costly requests are much more important than __GFP_REPEAT - * costly ones because they are de facto nofail and invoke OOM - * killer to move on while costly can fail and users are ready - * to cope with that. 1/4 retries is rather arbitrary but we - * would need much more detailed feedback from compaction to - * make a better decision. - */ - if (order > PAGE_ALLOC_COSTLY_ORDER) - max_retries /= 4; - if (compaction_retries <= max_retries) - return true; - - return false; -} #else static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, @@ -3195,6 +3147,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; } +#endif /* CONFIG_COMPACTION */ + static inline bool should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags, enum compact_result compact_result, @@ -3221,7 +3175,6 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla } return false; } -#endif /* CONFIG_COMPACTION */ /* Perform direct synchronous page reclaim */ static int -- cgit v0.10.2 From 070c43eea5043e950daa423707ae3c77e2f48edb Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Thu, 1 Sep 2016 16:14:44 -0700 Subject: kexec: fix double-free when failing to relocate the purgatory If kexec_apply_relocations fails, kexec_load_purgatory frees pi->sechdrs and pi->purgatory_buf. This is redundant, because in case of error kimage_file_prepare_segments calls kimage_file_post_load_cleanup, which will also free those buffers. This causes two warnings like the following, one for pi->sechdrs and the other for pi->purgatory_buf: kexec-bzImage64: Loading purgatory failed ------------[ cut here ]------------ WARNING: CPU: 1 PID: 2119 at mm/vmalloc.c:1490 __vunmap+0xc1/0xd0 Trying to vfree() nonexistent vm area (ffffc90000e91000) Modules linked in: CPU: 1 PID: 2119 Comm: kexec Not tainted 4.8.0-rc3+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x4d/0x65 __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 ? find_vmap_area+0x19/0x70 ? kimage_file_post_load_cleanup+0x47/0xb0 __vunmap+0xc1/0xd0 vfree+0x2e/0x70 kimage_file_post_load_cleanup+0x5e/0xb0 SyS_kexec_file_load+0x448/0x680 ? putname+0x54/0x60 ? do_sys_open+0x190/0x1f0 entry_SYSCALL_64_fastpath+0x13/0x8f ---[ end trace 158bb74f5950ca2b ]--- Fix by setting pi->sechdrs an pi->purgatory_buf to NULL, since vfree won't try to free a NULL pointer. Link: http://lkml.kernel.org/r/1472083546-23683-1-git-send-email-bauerman@linux.vnet.ibm.com Signed-off-by: Thiago Jung Bauermann Acked-by: Baoquan He Cc: "Eric W. Biederman" Cc: Vivek Goyal Cc: Dave Young Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 503bc2d..037c321 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min, return 0; out: vfree(pi->sechdrs); + pi->sechdrs = NULL; + vfree(pi->purgatory_buf); + pi->purgatory_buf = NULL; return ret; } -- cgit v0.10.2 From 236dec051078a8691950f56949612b4b74107e48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 1 Sep 2016 16:14:47 -0700 Subject: kconfig: tinyconfig: provide whole choice blocks to avoid warnings Using "make tinyconfig" produces a couple of annoying warnings that show up for build test machines all the time: .config:966:warning: override: NOHIGHMEM changes choice state .config:965:warning: override: SLOB changes choice state .config:963:warning: override: KERNEL_XZ changes choice state .config:962:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state .config:933:warning: override: SLOB changes choice state .config:930:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state .config:870:warning: override: SLOB changes choice state .config:868:warning: override: KERNEL_XZ changes choice state .config:867:warning: override: CC_OPTIMIZE_FOR_SIZE changes choice state I've made a previous attempt at fixing them and we discussed a number of alternatives. I tried changing the Makefile to use "merge_config.sh -n $(fragment-list)" but couldn't get that to work properly. This is yet another approach, based on the observation that we do want to see a warning for conflicting 'choice' options, and that we can simply make them non-conflicting by listing all other options as disabled. This is a trivial patch that we can apply independent of plans for other changes. Link: http://lkml.kernel.org/r/20160829214952.1334674-2-arnd@arndb.de Link: https://storage.kernelci.org/mainline/v4.7-rc6/x86-tinyconfig/build.log https://patchwork.kernel.org/patch/9212749/ Signed-off-by: Arnd Bergmann Reviewed-by: Josh Triplett Reviewed-by: Masahiro Yamada Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config index 4e2ecfa..4b429df 100644 --- a/arch/x86/configs/tiny.config +++ b/arch/x86/configs/tiny.config @@ -1 +1,3 @@ CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config index c2de56a..7fa0c4a 100644 --- a/kernel/configs/tiny.config +++ b/kernel/configs/tiny.config @@ -1,4 +1,12 @@ +# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_KERNEL_GZIP is not set +# CONFIG_KERNEL_BZIP2 is not set +# CONFIG_KERNEL_LZMA is not set CONFIG_KERNEL_XZ=y +# CONFIG_KERNEL_LZO is not set +# CONFIG_KERNEL_LZ4 is not set CONFIG_OPTIMIZE_INLINING=y +# CONFIG_SLAB is not set +# CONFIG_SLUB is not set CONFIG_SLOB=y -- cgit v0.10.2 From ed76b7a131f41c91b0c725d472f9b969d75ce888 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Sep 2016 16:14:50 -0700 Subject: lib/test_hash.c: fix warning in two-dimensional array init lib/test_hash.c: In function 'test_hash_init': lib/test_hash.c:146:2: warning: missing braces around initializer [-Wmissing-braces] Fixes: 468a9428521e7d00 (": Add support for architecture-specific functions") Link: http://lkml.kernel.org/r/20160829214952.1334674-3-arnd@arndb.de Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Acked-by: George Spelvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/test_hash.c b/lib/test_hash.c index 66c5fc8..81702ee 100644 --- a/lib/test_hash.c +++ b/lib/test_hash.c @@ -143,7 +143,7 @@ static int __init test_hash_init(void) { char buf[SIZE+1]; - u32 string_or = 0, hash_or[2][33] = { 0 }; + u32 string_or = 0, hash_or[2][33] = { { 0, } }; unsigned tests = 0; unsigned long long h64 = 0; int i, j; -- cgit v0.10.2 From e6173ba42bbdba05fd4f3021c0beda0506271507 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 1 Sep 2016 16:14:53 -0700 Subject: lib/test_hash.c: fix warning in preprocessor symbol evaluation Some versions of gcc don't like tests for the value of an undefined preprocessor symbol, even in the #else branch of an #ifndef: lib/test_hash.c:224:7: warning: "HAVE_ARCH__HASH_32" is not defined [-Wundef] #elif HAVE_ARCH__HASH_32 != 1 ^ lib/test_hash.c:229:7: warning: "HAVE_ARCH_HASH_32" is not defined [-Wundef] #elif HAVE_ARCH_HASH_32 != 1 ^ lib/test_hash.c:234:7: warning: "HAVE_ARCH_HASH_64" is not defined [-Wundef] #elif HAVE_ARCH_HASH_64 != 1 ^ Seen with gcc 4.9, not seen with 4.1.2. Change the logic to only check the value inside an #ifdef to fix this. Fixes: 468a9428521e7d00 (": Add support for architecture-specific functions") Link: http://lkml.kernel.org/r/20160829214952.1334674-4-arnd@arndb.de Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Acked-by: George Spelvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/test_hash.c b/lib/test_hash.c index 81702ee..cac20c5 100644 --- a/lib/test_hash.c +++ b/lib/test_hash.c @@ -219,21 +219,27 @@ test_hash_init(void) } /* Issue notices about skipped tests. */ -#ifndef HAVE_ARCH__HASH_32 - pr_info("__hash_32() has no arch implementation to test."); -#elif HAVE_ARCH__HASH_32 != 1 +#ifdef HAVE_ARCH__HASH_32 +#if HAVE_ARCH__HASH_32 != 1 pr_info("__hash_32() is arch-specific; not compared to generic."); #endif -#ifndef HAVE_ARCH_HASH_32 - pr_info("hash_32() has no arch implementation to test."); -#elif HAVE_ARCH_HASH_32 != 1 +#else + pr_info("__hash_32() has no arch implementation to test."); +#endif +#ifdef HAVE_ARCH_HASH_32 +#if HAVE_ARCH_HASH_32 != 1 pr_info("hash_32() is arch-specific; not compared to generic."); #endif -#ifndef HAVE_ARCH_HASH_64 - pr_info("hash_64() has no arch implementation to test."); -#elif HAVE_ARCH_HASH_64 != 1 +#else + pr_info("hash_32() has no arch implementation to test."); +#endif +#ifdef HAVE_ARCH_HASH_64 +#if HAVE_ARCH_HASH_64 != 1 pr_info("hash_64() is arch-specific; not compared to generic."); #endif +#else + pr_info("hash_64() has no arch implementation to test."); +#endif pr_notice("%u tests passed.", tests); -- cgit v0.10.2 From 6aa303defb7454a2520c4ddcdf6b081f62a15890 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 1 Sep 2016 16:14:55 -0700 Subject: mm, vmscan: only allocate and reclaim from zones with pages managed by the buddy allocator Firmware Assisted Dump (FA_DUMP) on ppc64 reserves substantial amounts of memory when booting a secondary kernel. Srikar Dronamraju reported that multiple nodes may have no memory managed by the buddy allocator but still return true for populated_zone(). Commit 1d82de618ddd ("mm, vmscan: make kswapd reclaim in terms of nodes") was reported to cause kswapd to spin at 100% CPU usage when fadump was enabled. The old code happened to deal with the situation of a populated node with zero free pages by co-incidence but the current code tries to reclaim populated zones without realising that is impossible. We cannot just convert populated_zone() as many existing users really need to check for present_pages. This patch introduces a managed_zone() helper and uses it in the few cases where it is critical that the check is made for managed pages -- zonelist construction and page reclaim. Link: http://lkml.kernel.org/r/20160831195104.GB8119@techsingularity.net Signed-off-by: Mel Gorman Reported-by: Srikar Dronamraju Tested-by: Srikar Dronamraju Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d572b78..7f2ae99 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -828,9 +828,21 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) -static inline int populated_zone(struct zone *zone) +/* + * Returns true if a zone has pages managed by the buddy allocator. + * All the reclaim decisions have to use this function rather than + * populated_zone(). If the whole zone is reserved then we can easily + * end up with populated_zone() && !managed_zone(). + */ +static inline bool managed_zone(struct zone *zone) +{ + return zone->managed_pages; +} + +/* Returns true if a zone has memory */ +static inline bool populated_zone(struct zone *zone) { - return (!!zone->present_pages); + return zone->present_pages; } extern int movable_zone; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7791a03..a2214c6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4360,7 +4360,7 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, do { zone_type--; zone = pgdat->node_zones + zone_type; - if (populated_zone(zone)) { + if (managed_zone(zone)) { zoneref_set_zone(zone, &zonelist->_zonerefs[nr_zones++]); check_highest_zone(zone_type); @@ -4598,7 +4598,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) for (j = 0; j < nr_nodes; j++) { node = node_order[j]; z = &NODE_DATA(node)->node_zones[zone_type]; - if (populated_zone(z)) { + if (managed_zone(z)) { zoneref_set_zone(z, &zonelist->_zonerefs[pos++]); check_highest_zone(zone_type); diff --git a/mm/vmscan.c b/mm/vmscan.c index 374d95d..b1e12a1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1665,7 +1665,7 @@ static bool inactive_reclaimable_pages(struct lruvec *lruvec, for (zid = sc->reclaim_idx; zid >= 0; zid--) { zone = &pgdat->node_zones[zid]; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE + @@ -2036,7 +2036,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, struct zone *zone = &pgdat->node_zones[zid]; unsigned long inactive_zone, active_zone; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; inactive_zone = zone_page_state(zone, @@ -2171,7 +2171,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = &pgdat->node_zones[z]; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; total_high_wmark += high_wmark_pages(zone); @@ -2510,7 +2510,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, /* If compaction would go ahead or the allocation would succeed, stop */ for (z = 0; z <= sc->reclaim_idx; z++) { struct zone *zone = &pgdat->node_zones[z]; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { @@ -2840,7 +2840,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; - if (!populated_zone(zone) || + if (!managed_zone(zone) || pgdat_reclaimable_pages(pgdat) == 0) continue; @@ -3141,7 +3141,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) for (i = 0; i <= classzone_idx; i++) { struct zone *zone = pgdat->node_zones + i; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (!zone_balanced(zone, order, classzone_idx)) @@ -3169,7 +3169,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat, sc->nr_to_reclaim = 0; for (z = 0; z <= sc->reclaim_idx; z++) { zone = pgdat->node_zones + z; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); @@ -3242,7 +3242,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) if (buffer_heads_over_limit) { for (i = MAX_NR_ZONES - 1; i >= 0; i--) { zone = pgdat->node_zones + i; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; sc.reclaim_idx = i; @@ -3262,7 +3262,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) */ for (i = classzone_idx; i >= 0; i--) { zone = pgdat->node_zones + i; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (zone_balanced(zone, sc.order, classzone_idx)) @@ -3508,7 +3508,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) pg_data_t *pgdat; int z; - if (!populated_zone(zone)) + if (!managed_zone(zone)) return; if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL)) @@ -3522,7 +3522,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) /* Only wake kswapd if all zones are unbalanced */ for (z = 0; z <= classzone_idx; z++) { zone = pgdat->node_zones + z; - if (!populated_zone(zone)) + if (!managed_zone(zone)) continue; if (zone_balanced(zone, order, classzone_idx)) -- cgit v0.10.2 From 8a793bea2745d9876030f892a84fc8be180f2e78 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 1 Sep 2016 16:14:58 -0700 Subject: drivers/scsi/wd719x.c: remove last declaration using DEFINE_PCI_DEVICE_TABLE Convert it to the preferred const struct pci_device_id instead. Link: http://lkml.kernel.org/r/95c5e4100c3cd4eda643624f5b70e8d7abceb86c.1472660229.git.joe@perches.com Signed-off-by: Joe Perches Reviewed-by: Bart Van Assche Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c index e3da1a2..2a9da2e 100644 --- a/drivers/scsi/wd719x.c +++ b/drivers/scsi/wd719x.c @@ -962,7 +962,7 @@ static void wd719x_pci_remove(struct pci_dev *pdev) scsi_host_put(sh); } -static DEFINE_PCI_DEVICE_TABLE(wd719x_pci_table) = { +static const struct pci_device_id wd719x_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_WD, 0x3296) }, {} }; -- cgit v0.10.2 From 7e932159901183283cd82d797bc9a7c681e48e9c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 1 Sep 2016 16:15:01 -0700 Subject: treewide: remove references to the now unnecessary DEFINE_PCI_DEVICE_TABLE It's been eliminated from the sources, remove it from everywhere else. Link: http://lkml.kernel.org/r/076eff466fd7edb550c25c8b25d76924ca0eba62.1472660229.git.joe@perches.com Signed-off-by: Joe Perches Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Bjorn Helgaas Cc: Andy Whitcroft Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt index 123881f..77f49dc 100644 --- a/Documentation/PCI/pci.txt +++ b/Documentation/PCI/pci.txt @@ -124,7 +124,6 @@ initialization with a pointer to a structure describing the driver The ID table is an array of struct pci_device_id entries ending with an all-zero entry. Definitions with static const are generally preferred. -Use of the deprecated macro DEFINE_PCI_DEVICE_TABLE should be avoided. Each entry consists of: diff --git a/include/linux/pci.h b/include/linux/pci.h index fbc1fa6..0ab8359 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -683,15 +683,6 @@ struct pci_driver { #define to_pci_driver(drv) container_of(drv, struct pci_driver, driver) /** - * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table - * @_table: device table name - * - * This macro is deprecated and should not be used in new code. - */ -#define DEFINE_PCI_DEVICE_TABLE(_table) \ - const struct pci_device_id _table[] - -/** * PCI_DEVICE - macro used to describe a specific pci device * @vend: the 16 bit PCI Vendor ID * @dev: the 16 bit PCI Device ID diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4de3cc4..206a6b3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3570,15 +3570,6 @@ sub process { } } -# check for uses of DEFINE_PCI_DEVICE_TABLE - if ($line =~ /\bDEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=/) { - if (WARN("DEFINE_PCI_DEVICE_TABLE", - "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) && - $fix) { - $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /; - } - } - # check for new typedefs, only function parameters and sparse annotations # make sense. if ($line =~ /\btypedef\s/ && diff --git a/scripts/tags.sh b/scripts/tags.sh index ed7eef2..b3775a9 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -206,7 +206,6 @@ regex_c=( '/\ Date: Thu, 1 Sep 2016 16:15:04 -0700 Subject: printk/nmi: avoid direct printk()-s from __printk_nmi_flush() __printk_nmi_flush() can be called from nmi_panic(), therefore it has to test whether it's executed in NMI context and thus must route the messages through deferred printk() or via direct printk(). This is to avoid potential deadlocks, as described in commit cf9b1106c81c ("printk/nmi: flush NMI messages on the system panic"). However there remain two places where __printk_nmi_flush() does unconditional direct printk() calls: - pr_err("printk_nmi_flush: internal error ...") - pr_cont("\n") Factor out print_nmi_seq_line() parts into a new printk_nmi_flush_line() function, which takes care of in_nmi(), and use it in __printk_nmi_flush() for printing and error-reporting. Link: http://lkml.kernel.org/r/20160830161354.581-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Petr Mladek Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c index b69eb8a..16bab47 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/nmi.c @@ -99,27 +99,33 @@ again: return add; } -/* - * printk one line from the temporary buffer from @start index until - * and including the @end index. - */ -static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end) +static void printk_nmi_flush_line(const char *text, int len) { - const char *buf = s->buffer + start; - /* * The buffers are flushed in NMI only on panic. The messages must * go only into the ring buffer at this stage. Consoles will get * explicitly called later when a crashdump is not generated. */ if (in_nmi()) - printk_deferred("%.*s", (end - start) + 1, buf); + printk_deferred("%.*s", len, text); else - printk("%.*s", (end - start) + 1, buf); + printk("%.*s", len, text); } /* + * printk one line from the temporary buffer from @start index until + * and including the @end index. + */ +static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s, + int start, int end) +{ + const char *buf = s->buffer + start; + + printk_nmi_flush_line(buf, (end - start) + 1); +} + +/* * Flush data from the associated per_CPU buffer. The function * can be called either via IRQ work or independently. */ @@ -150,9 +156,11 @@ more: * the buffer an unexpected way. If we printed something then * @len must only increase. */ - if (i && i >= len) - pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n", - i, len); + if (i && i >= len) { + const char *msg = "printk_nmi_flush: internal error\n"; + + printk_nmi_flush_line(msg, strlen(msg)); + } if (!len) goto out; /* Someone else has already flushed the buffer. */ @@ -166,14 +174,14 @@ more: /* Print line by line. */ for (; i < size; i++) { if (s->buffer[i] == '\n') { - print_nmi_seq_line(s, last_i, i); + printk_nmi_flush_seq_line(s, last_i, i); last_i = i + 1; } } /* Check if there was a partial line. */ if (last_i < size) { - print_nmi_seq_line(s, last_i, size - 1); - pr_cont("\n"); + printk_nmi_flush_seq_line(s, last_i, size - 1); + printk_nmi_flush_line("\n", strlen("\n")); } /* -- cgit v0.10.2 From c11600e4fed67ae4cd6a8096936afd445410e8ed Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 1 Sep 2016 16:15:07 -0700 Subject: mm, mempolicy: task->mempolicy must be NULL before dropping final reference KASAN allocates memory from the page allocator as part of kmem_cache_free(), and that can reference current->mempolicy through any number of allocation functions. It needs to be NULL'd out before the final reference is dropped to prevent a use-after-free bug: BUG: KASAN: use-after-free in alloc_pages_current+0x363/0x370 at addr ffff88010b48102c CPU: 0 PID: 15425 Comm: trinity-c2 Not tainted 4.8.0-rc2+ #140 ... Call Trace: dump_stack kasan_object_err kasan_report_error __asan_report_load2_noabort alloc_pages_current <-- use after free depot_save_stack save_stack kasan_slab_free kmem_cache_free __mpol_put <-- free do_exit This patch sets current->mempolicy to NULL before dropping the final reference. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1608301442180.63329@chino.kir.corp.google.com Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: David Rientjes Reported-by: Vegard Nossum Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 4429d25..5e5b296 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -195,6 +195,7 @@ static inline bool vma_migratable(struct vm_area_struct *vma) } extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); +extern void mpol_put_task_policy(struct task_struct *); #else @@ -297,5 +298,8 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, return -1; /* no node preference */ } +static inline void mpol_put_task_policy(struct task_struct *task) +{ +} #endif /* CONFIG_NUMA */ #endif diff --git a/kernel/exit.c b/kernel/exit.c index 2f974ae..091a78b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -848,12 +848,7 @@ void do_exit(long code) TASKS_RCU(preempt_enable()); exit_notify(tsk, group_dead); proc_exit_connector(tsk); -#ifdef CONFIG_NUMA - task_lock(tsk); - mpol_put(tsk->mempolicy); - tsk->mempolicy = NULL; - task_unlock(tsk); -#endif + mpol_put_task_policy(tsk); #ifdef CONFIG_FUTEX if (unlikely(current->pi_state_cache)) kfree(current->pi_state_cache); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d8c4e38..2da72a5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2336,6 +2336,23 @@ out: return ret; } +/* + * Drop the (possibly final) reference to task->mempolicy. It needs to be + * dropped after task->mempolicy is set to NULL so that any allocation done as + * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed + * policy. + */ +void mpol_put_task_policy(struct task_struct *task) +{ + struct mempolicy *pol; + + task_lock(task); + pol = task->mempolicy; + task->mempolicy = NULL; + task_unlock(task); + mpol_put(pol); +} + static void sp_delete(struct shared_policy *sp, struct sp_node *n) { pr_debug("deleting %lx-l%lx\n", n->start, n->end); -- cgit v0.10.2 From c4e297386bd1621b83f6f7d58a729fb770597a91 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 1 Sep 2016 16:15:09 -0700 Subject: MAINTAINERS: Vladimir has moved vdavydov@{parallels,virtuozzo}.com will bounce from now on. Link: http://lkml.kernel.org/r/20160831180752.GB10353@esperanza Signed-off-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/.mailmap b/.mailmap index 2a91c14..093a8ac 100644 --- a/.mailmap +++ b/.mailmap @@ -158,6 +158,8 @@ Valdis Kletnieks Viresh Kumar Viresh Kumar Viresh Kumar +Vladimir Davydov +Vladimir Davydov Takashi YOSHII Yusuke Goda Gustavo Padovan diff --git a/MAINTAINERS b/MAINTAINERS index 71aa5da..d44be8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3247,7 +3247,7 @@ F: kernel/cpuset.c CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner M: Michal Hocko -M: Vladimir Davydov +M: Vladimir Davydov L: cgroups@vger.kernel.org L: linux-mm@kvack.org S: Maintained -- cgit v0.10.2 From 735f2770a770156100f534646158cb58cb8b2939 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 1 Sep 2016 16:15:13 -0700 Subject: kernel/fork: fix CLONE_CHILD_CLEARTID regression in nscd Commit fec1d0115240 ("[PATCH] Disable CLONE_CHILD_CLEARTID for abnormal exit") has caused a subtle regression in nscd which uses CLONE_CHILD_CLEARTID to clear the nscd_certainly_running flag in the shared databases, so that the clients are notified when nscd is restarted. Now, when nscd uses a non-persistent database, clients that have it mapped keep thinking the database is being updated by nscd, when in fact nscd has created a new (anonymous) one (for non-persistent databases it uses an unlinked file as backend). The original proposal for the CLONE_CHILD_CLEARTID change claimed (https://lkml.org/lkml/2006/10/25/233): : The NPTL library uses the CLONE_CHILD_CLEARTID flag on clone() syscalls : on behalf of pthread_create() library calls. This feature is used to : request that the kernel clear the thread-id in user space (at an address : provided in the syscall) when the thread disassociates itself from the : address space, which is done in mm_release(). : : Unfortunately, when a multi-threaded process incurs a core dump (such as : from a SIGSEGV), the core-dumping thread sends SIGKILL signals to all of : the other threads, which then proceed to clear their user-space tids : before synchronizing in exit_mm() with the start of core dumping. This : misrepresents the state of process's address space at the time of the : SIGSEGV and makes it more difficult for someone to debug NPTL and glibc : problems (misleading him/her to conclude that the threads had gone away : before the fault). : : The fix below is to simply avoid the CLONE_CHILD_CLEARTID action if a : core dump has been initiated. The resulting patch from Roland (https://lkml.org/lkml/2006/10/26/269) seems to have a larger scope than the original patch asked for. It seems that limitting the scope of the check to core dumping should work for SIGSEGV issue describe above. [Changelog partly based on Andreas' description] Fixes: fec1d0115240 ("[PATCH] Disable CLONE_CHILD_CLEARTID for abnormal exit") Link: http://lkml.kernel.org/r/1471968749-26173-1-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Tested-by: William Preston Acked-by: Oleg Nesterov Cc: Roland McGrath Cc: Andreas Schwab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/fork.c b/kernel/fork.c index aaf7823..93bdba1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -913,14 +913,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) deactivate_mm(tsk, mm); /* - * If we're exiting normally, clear a user-space tid field if - * requested. We leave this alone when dying by signal, to leave - * the value intact in a core dump, and to save the unnecessary - * trouble, say, a killed vfork parent shouldn't touch this mm. - * Userland only wants this done for a sys_exit. + * Signal userspace if we're not exiting with a core dump + * because we want to leave the value intact for debugging + * purposes. */ if (tsk->clear_child_tid) { - if (!(tsk->flags & PF_SIGNALED) && + if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && atomic_read(&mm->mm_users) > 1) { /* * We don't check the error code - if userspace has -- cgit v0.10.2 From 1e1011af7a5725141aa4c8d30132acd93fe3da4e Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 1 Sep 2016 16:15:15 -0700 Subject: rapidio/documentation/mport_cdev: add missing parameter description Add missing description for rio_mport_cdev driver parameter 'dma_timeout'. This patch is applicable to kernel versions starting from v4.6. Link: http://lkml.kernel.org/r/20160901173104.2928-1-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Documentation/rapidio/mport_cdev.txt b/Documentation/rapidio/mport_cdev.txt index 6e491a6..a53f786 100644 --- a/Documentation/rapidio/mport_cdev.txt +++ b/Documentation/rapidio/mport_cdev.txt @@ -80,6 +80,10 @@ functionality of their platform when planning to use this driver: III. Module parameters +- 'dma_timeout' - DMA transfer completion timeout (in msec, default value 3000). + This parameter set a maximum completion wait time for SYNC mode DMA + transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests. + - 'dbg_level' - This parameter allows to control amount of debug information generated by this device driver. This parameter is formed by set of bit masks that correspond to the specific functional blocks. -- cgit v0.10.2 From b30069291dc7f9b9a073c33d619818fe4a8e50de Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 1 Sep 2016 16:15:18 -0700 Subject: rapidio/tsi721: fix incorrect detection of address translation condition Fix incorrect condition to identify involvment of a address translation mechanism. This bug results in NULL pointer kernel crash dump in cases when mapping of inbound RapidIO address range is requested within existing aprture. Link: http://lkml.kernel.org/r/20160901173144.2983-1-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 32f0f01..9d19b9a 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -1161,7 +1161,7 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, } else if (ibw_start < (ib_win->rstart + ib_win->size) && (ibw_start + ibw_size) > ib_win->rstart) { /* Return error if address translation involved */ - if (direct && ib_win->xlat) { + if (!direct || ib_win->xlat) { ret = -EFAULT; break; } -- cgit v0.10.2 From 08d072599234c959b0b82b63fa252c129225a899 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 2 Sep 2016 14:38:23 +0800 Subject: tick/nohz: Fix softlockup on scheduler stalls in kvm guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tick_nohz_start_idle() is prevented to be called if the idle tick can't be stopped since commit 1f3b0f8243cb934 ("tick/nohz: Optimize nohz idle enter"). As a result, after suspend/resume the host machine, full dynticks kvm guest will softlockup: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [swapper/0:0] Call Trace: default_idle+0x31/0x1a0 arch_cpu_idle+0xf/0x20 default_idle_call+0x2a/0x50 cpu_startup_entry+0x39b/0x4d0 rest_init+0x138/0x140 ? rest_init+0x5/0x140 start_kernel+0x4c1/0x4ce ? set_init_arg+0x55/0x55 ? early_idt_handler_array+0x120/0x120 x86_64_start_reservations+0x24/0x26 x86_64_start_kernel+0x142/0x14f In addition, cat /proc/stat | grep cpu in guest or host: cpu 398 16 5049 15754 5490 0 1 46 0 0 cpu0 206 5 450 0 0 0 1 14 0 0 cpu1 81 0 3937 3149 1514 0 0 9 0 0 cpu2 45 6 332 6052 2243 0 0 11 0 0 cpu3 65 2 328 6552 1732 0 0 11 0 0 The idle and iowait states are weird 0 for cpu0(housekeeping). The bug is present in both guest and host kernels, and they both have cpu0's idle and iowait states issue, however, host kernel's suspend/resume path etc will touch watchdog to avoid the softlockup. - The watchdog will not be touched in tick_nohz_stop_idle path (need be touched since the scheduler stall is expected) if idle_active flags are not detected. - The idle and iowait states will not be accounted when exit idle loop (resched or interrupt) if idle start time and idle_active flags are not set. This patch fixes it by reverting commit 1f3b0f8243cb934 since can't stop idle tick doesn't mean can't be idle. Fixes: 1f3b0f8243cb934 ("tick/nohz: Optimize nohz idle enter") Signed-off-by: Wanpeng Li Cc: Sanjeev Yadav Cc: Gaurav Jindal Cc: stable@vger.kernel.org Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Peter Zijlstra Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/1472798303-4154-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Thomas Gleixner diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 204fdc8..2ec7c00 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -908,10 +908,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) ktime_t now, expires; int cpu = smp_processor_id(); + now = tick_nohz_start_idle(ts); + if (can_stop_idle_tick(cpu, ts)) { int was_stopped = ts->tick_stopped; - now = tick_nohz_start_idle(ts); ts->idle_calls++; expires = tick_nohz_stop_sched_tick(ts, now, cpu); -- cgit v0.10.2 From 753246840d012ae34ea80a1d40bc1546c62fb957 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 27 Aug 2016 16:19:49 +0000 Subject: drivers/perf: arm_pmu: Fix leak in error path In case of a IRQ type mismatch in of_pmu_irq_cfg() the device node for interrupt affinity isn't freed. So fix this issue by calling of_node_put(). Signed-off-by: Stefan Wahren Fixes: fa8ad7889d83 ("arm: perf: factor arm_pmu core out to drivers") Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index c494613..1b48bf0 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -925,6 +925,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (i > 0 && spi != using_spi) { pr_err("PPI/SPI IRQ type mismatch for %s!\n", dn->name); + of_node_put(dn); kfree(irqs); return -EINVAL; } -- cgit v0.10.2 From 63fb0a9516b2c4e23293d7253c14c40aa9c2b7d1 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 27 Aug 2016 16:19:50 +0000 Subject: drivers/perf: arm_pmu: Fix NULL pointer dereference during probe Patch 7f1d642fbb5c ("drivers/perf: arm-pmu: Fix handling of SPI lacking interrupt-affinity property") unintended also fixes perf_event support for bcm2835 which doesn't have PMU interrupts. Unfortunately this change introduce a NULL pointer dereference on bcm2835, because irq_is_percpu always expected to be called with a valid IRQ. So fix this regression by validating the IRQ before. Tested-by: Kevin Hilman Signed-off-by: Stefan Wahren Fixes: 7f1d642fbb5c ("drivers/perf: arm-pmu: Fix handling of SPI lacking "interrupt-affinity" property") Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1b48bf0..f5e1008 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -970,7 +970,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (cpumask_weight(&pmu->supported_cpus) == 0) { int irq = platform_get_irq(pdev, 0); - if (irq_is_percpu(irq)) { + if (irq >= 0 && irq_is_percpu(irq)) { /* If using PPIs, check the affinity of the partition */ int ret; -- cgit v0.10.2 From 744c6c37cc18705d19e179622f927f5b781fe9cc Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 26 Aug 2016 16:03:42 +0100 Subject: arm64: kernel: Fix unmasked debug exceptions when restoring mdscr_el1 Changes to make the resume from cpu_suspend() code behave more like secondary boot caused debug exceptions to be unmasked early by __cpu_setup(). We then go on to restore mdscr_el1 in cpu_do_resume(), potentially taking break or watch points based on uninitialised registers. Mask debug exceptions in cpu_do_resume(), which is specific to resume from cpu_suspend(). Debug exceptions will be restored to their original state by local_dbg_restore() in cpu_suspend(), which runs after hw_breakpoint_restore() has re-initialised the other registers. Reported-by: Lorenzo Pieralisi Fixes: cabe1c81ea5b ("arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va") Cc: # 4.7+ Signed-off-by: James Morse Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5bb61de..9d37e96 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -100,7 +100,16 @@ ENTRY(cpu_do_resume) msr tcr_el1, x8 msr vbar_el1, x9 + + /* + * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking + * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug + * exception. Mask them until local_dbg_restore() in cpu_suspend() + * resets them. + */ + disable_dbg msr mdscr_el1, x10 + msr sctlr_el1, x12 /* * Restore oslsr_el1 by writing oslar_el1 -- cgit v0.10.2 From 15301a570754c7af60335d094dd2d1808b0641a5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 May 2016 13:47:26 -0400 Subject: x86/paravirt: Do not trace _paravirt_ident_*() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Łukasz Daniluk reported that on a RHEL kernel that his machine would lock up after enabling function tracer. I asked him to bisect the functions within available_filter_functions, which he did and it came down to three: _paravirt_nop(), _paravirt_ident_32() and _paravirt_ident_64() It was found that this is only an issue when noreplace-paravirt is added to the kernel command line. This means that those functions are most likely called within critical sections of the funtion tracer, and must not be traced. In newer kenels _paravirt_nop() is defined within gcc asm(), and is no longer an issue. But both _paravirt_ident_{32,64}() causes the following splat when they are traced: mm/pgtable-generic.c:33: bad pmd ffff8800d2435150(0000000001d00054) mm/pgtable-generic.c:33: bad pmd ffff8800d3624190(0000000001d00070) mm/pgtable-generic.c:33: bad pmd ffff8800d36a5110(0000000001d00054) mm/pgtable-generic.c:33: bad pmd ffff880118eb1450(0000000001d00054) NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [systemd-journal:469] Modules linked in: e1000e CPU: 2 PID: 469 Comm: systemd-journal Not tainted 4.6.0-rc4-test+ #513 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 task: ffff880118f740c0 ti: ffff8800d4aec000 task.ti: ffff8800d4aec000 RIP: 0010:[] [] queued_spin_lock_slowpath+0x118/0x1a0 RSP: 0018:ffff8800d4aefb90 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011eb16d40 RDX: ffffffff82485760 RSI: 000000001f288820 RDI: ffffea0000008030 RBP: ffff8800d4aefb90 R08: 00000000000c0000 R09: 0000000000000000 R10: ffffffff821c8e0e R11: 0000000000000000 R12: ffff880000200fb8 R13: 00007f7a4e3f7000 R14: ffffea000303f600 R15: ffff8800d4b562e0 FS: 00007f7a4e3d7840(0000) GS:ffff88011eb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7a4e3f7000 CR3: 00000000d3e71000 CR4: 00000000001406e0 Call Trace: _raw_spin_lock+0x27/0x30 handle_pte_fault+0x13db/0x16b0 handle_mm_fault+0x312/0x670 __do_page_fault+0x1b1/0x4e0 do_page_fault+0x22/0x30 page_fault+0x28/0x30 __vfs_read+0x28/0xe0 vfs_read+0x86/0x130 SyS_read+0x46/0xa0 entry_SYSCALL_64_fastpath+0x1e/0xa8 Code: 12 48 c1 ea 0c 83 e8 01 83 e2 30 48 98 48 81 c2 40 6d 01 00 48 03 14 c5 80 6a 5d 82 48 89 0a 8b 41 08 85 c0 75 09 f3 90 8b 41 08 <85> c0 74 f7 4c 8b 09 4d 85 c9 74 08 41 0f 18 09 eb 02 f3 90 8b Reported-by: Łukasz Daniluk Signed-off-by: Steven Rostedt Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ad5bc95..1acfd76 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -56,12 +56,12 @@ asm (".pushsection .entry.text, \"ax\"\n" ".popsection"); /* identity function, which can be inlined */ -u32 _paravirt_ident_32(u32 x) +u32 notrace _paravirt_ident_32(u32 x) { return x; } -u64 _paravirt_ident_64(u64 x) +u64 notrace _paravirt_ident_64(u64 x) { return x; } -- cgit v0.10.2 From d1992996753132e2dafe955cccb2fb0714d3cfc4 Mon Sep 17 00:00:00 2001 From: Emanuel Czirai Date: Fri, 2 Sep 2016 07:35:50 +0200 Subject: x86/AMD: Apply erratum 665 on machines without a BIOS fix AMD F12h machines have an erratum which can cause DIV/IDIV to behave unpredictably. The workaround is to set MSRC001_1029[31] but sometimes there is no BIOS update containing that workaround so let's do it ourselves unconditionally. It is simple enough. [ Borislav: Wrote commit message. ] Signed-off-by: Emanuel Czirai Signed-off-by: Borislav Petkov Cc: Yaowu Xu Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160902053550.18097-1-bp@alien8.de Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f5c69d8..b81fe2d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -669,6 +669,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } +#define MSR_AMD64_DE_CFG 0xC0011029 + +static void init_amd_ln(struct cpuinfo_x86 *c) +{ + /* + * Apply erratum 665 fix unconditionally so machines without a BIOS + * fix work. + */ + msr_set_bit(MSR_AMD64_DE_CFG, 31); +} + static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; @@ -726,6 +737,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 6: init_amd_k7(c); break; case 0xf: init_amd_k8(c); break; case 0x10: init_amd_gh(c); break; + case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; } -- cgit v0.10.2 From 3feab13c919f99b0a17d0ca22ae00cf90f5d3fd1 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 16 Aug 2016 16:59:52 +0100 Subject: ACPI / drivers: fix typo in ACPI_DECLARE_PROBE_ENTRY macro When the ACPI_DECLARE_PROBE_ENTRY macro was added in commit e647b532275b ("ACPI: Add early device probing infrastructure"), a stub macro adding an unused entry was added for the !CONFIG_ACPI Kconfig option case to make sure kernel code making use of the macro did not require to be guarded within CONFIG_ACPI in order to be compiled. The stub macro was never used since all kernel code that defines ACPI_DECLARE_PROBE_ENTRY entries is currently guarded within CONFIG_ACPI; it contains a typo that should be nonetheless fixed. Fix the typo in the stub (ie !CONFIG_ACPI) ACPI_DECLARE_PROBE_ENTRY() macro so that it can actually be used if needed. Signed-off-by: Lorenzo Pieralisi Fixes: e647b532275b (ACPI: Add early device probing infrastructure) Cc: 4.4+ # 4.4+ Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d8452c..c5eaf2f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1056,7 +1056,7 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev, return NULL; } -#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, validate, data, fn) \ +#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ = { (void *) table_id, \ -- cgit v0.10.2 From 5331d9cab32ef640b4cd38a43b0858874fbb7168 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 16 Aug 2016 16:59:53 +0100 Subject: ACPI / drivers: replace acpi_probe_lock spinlock with mutex Commit e647b532275b ("ACPI: Add early device probing infrastructure") introduced code that allows inserting driver specific struct acpi_probe_entry probe entries into ACPI linker sections (one per-subsystem, eg irqchip, clocksource) that are then walked to retrieve the data and function hooks required to probe the respective kernel components. Probing for all entries in a section is triggered through the __acpi_probe_device_table() function, that in turn, according to the table ID a given probe entry reports parses the table with the function retrieved from the respective section structures (ie struct acpi_probe_entry). Owing to the current ACPI table parsing implementation, the __acpi_probe_device_table() function has to share global variables with the acpi_match_madt() function, so in order to guarantee mutual exclusion locking is required between the two functions. Current kernel code implements the locking through the acpi_probe_lock spinlock; this has the side effect of requiring all code called within the lock (ie struct acpi_probe_entry.probe_{table/subtbl} hooks) not to sleep. However, kernel subsystems that make use of the early probing infrastructure are relying on kernel APIs that may sleep (eg irq_domain_alloc_fwnode(), among others) in the function calls pointed at by struct acpi_probe_entry.{probe_table/subtbl} entries (eg gic_v2_acpi_init()), which is a bug. Since __acpi_probe_device_table() is called from context that is allowed to sleep the acpi_probe_lock spinlock can be replaced with a mutex; this fixes the issue whilst still guaranteeing mutual exclusion. Signed-off-by: Lorenzo Pieralisi Fixes: e647b532275b (ACPI: Add early device probing infrastructure) Cc: 4.4+ # 4.4+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ad9fc84..e878fc7 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2054,7 +2054,7 @@ int __init acpi_scan_init(void) static struct acpi_probe_entry *ape; static int acpi_probe_count; -static DEFINE_SPINLOCK(acpi_probe_lock); +static DEFINE_MUTEX(acpi_probe_mutex); static int __init acpi_match_madt(struct acpi_subtable_header *header, const unsigned long end) @@ -2073,7 +2073,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr) if (acpi_disabled) return 0; - spin_lock(&acpi_probe_lock); + mutex_lock(&acpi_probe_mutex); for (ape = ap_head; nr; ape++, nr--) { if (ACPI_COMPARE_NAME(ACPI_SIG_MADT, ape->id)) { acpi_probe_count = 0; @@ -2086,7 +2086,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr) count++; } } - spin_unlock(&acpi_probe_lock); + mutex_unlock(&acpi_probe_mutex); return count; } -- cgit v0.10.2 From 4a29b3484f857ff350f203a2d9bf3b428c9e0bf2 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 2 Sep 2016 02:36:58 +0300 Subject: tpm: invalid self test error message The driver emits invalid self test error message even though the init succeeds. Signed-off-by: Jarkko Sakkinen Fixes: cae8b441fc20 ("tpm: Factor out common startup code") Reviewed-by: James Morris Signed-off-by: James Morris diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 08c7e23..0c75c3f 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -957,7 +957,7 @@ int tpm2_auto_startup(struct tpm_chip *chip) goto out; rc = tpm2_do_selftest(chip); - if (rc != TPM2_RC_INITIALIZE) { + if (rc != 0 && rc != TPM2_RC_INITIALIZE) { dev_err(&chip->dev, "TPM self test failed\n"); goto out; } @@ -974,7 +974,6 @@ int tpm2_auto_startup(struct tpm_chip *chip) } } - return rc; out: if (rc > 0) rc = -ENODEV; -- cgit v0.10.2 From 3e423945ea94412283eaba8bfbe9d6e0a80b434f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 3 Sep 2016 11:02:50 -0700 Subject: devpts: return NULL pts 'priv' entry for non-devpts nodes In commit 8ead9dd54716 ("devpts: more pty driver interface cleanups") I made devpts_get_priv() just return the dentry->fs_data directly. And because I thought it wouldn't happen, I added a warning if you ever saw a pts node that wasn't on devpts. And no, that warning never triggered under any actual real use, but you can trigger it by creating nonsensical pts nodes by hand. So just revert the warning, and make devpts_get_priv() return NULL for that case like it used to. Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org # 4.6+ Cc: Eric W Biederman" Signed-off-by: Linus Torvalds diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index d116453..79a5941 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -585,7 +585,8 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) */ void *devpts_get_priv(struct dentry *dentry) { - WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC); + if (dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC) + return NULL; return dentry->d_fsdata; } -- cgit v0.10.2 From c6935931c1894ff857616ff8549b61236a19148f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Sep 2016 14:31:46 -0700 Subject: Linux 4.8-rc5 diff --git a/Makefile b/Makefile index 67f42d5..a4e6cc5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v0.10.2 From 58763148758057ffc447bf990321d3ea86d199a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Aug 2016 10:15:03 +0200 Subject: perf/core: Remove WARN from perf_event_read() This effectively reverts commit: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") ... and puts in a comment explaining why we ignore the return value. Reported-by: Vegard Nossum Signed-off-by: Peter Zijlstra (Intel) Cc: David Carrillo-Cisneros Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cfabdf..07ac859 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group) .group = group, .ret = 0, }; - ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); - /* The event must have been read from an online CPU: */ - WARN_ON_ONCE(ret); - ret = ret ? : data.ret; + /* + * Purposely ignore the smp_call_function_single() return + * value. + * + * If event->oncpu isn't a valid CPU it means the event got + * scheduled out and that will have updated the event count. + * + * Therefore, either way, we'll have an up-to-date event count + * after this. + */ + (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; -- cgit v0.10.2