From 680682d4d537565e2c358483e1feeca30a8cf3d4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 17 Jul 2016 19:55:27 +0100
Subject: cfg80211: fix missing break in NL8211_CHAN_WIDTH_80P80 case

The switch on chandef->width is missing a break on the
NL8211_CHAN_WIDTH_80P80 case; currently we get a WARN_ON when
center_freq2 is non-zero because of the missing break.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index da49c0b..bb3d64e 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -513,6 +513,7 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
 		r = cfg80211_get_chans_dfs_available(wiphy,
 						     chandef->center_freq2,
 						     width);
+		break;
 	default:
 		WARN_ON(chandef->center_freq2);
 		break;
-- 
cgit v0.10.2


From 4e3f21bc7bfbdc4a348852e4da1540227e1b0171 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 11 Jul 2016 15:09:15 +0200
Subject: mac80211: fix check for buffered powersave frames with txq

The logic was inverted here, set the bit if frames are pending.

Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2e8a902..9dce3b1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1268,7 +1268,7 @@ static void sta_ps_start(struct sta_info *sta)
 	for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
 		struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
 
-		if (!txqi->tin.backlog_packets)
+		if (txqi->tin.backlog_packets)
 			set_bit(tid, &sta->txq_buffered_tids);
 		else
 			clear_bit(tid, &sta->txq_buffered_tids);
-- 
cgit v0.10.2


From c37a54ac37e7fbf8abe2b7b886ac5f1b3e8f8d29 Mon Sep 17 00:00:00 2001
From: Maital Hahn <maitalm@ti.com>
Date: Wed, 13 Jul 2016 14:44:41 +0300
Subject: mac80211: mesh: flush stations before beacons are stopped

Some drivers (e.g. wl18xx) expect that the last stage in the
de-initialization process will be stopping the beacons, similar to AP flow.
Update ieee80211_stop_mesh() flow accordingly.
As peers can be removed dynamically, this would not impact other drivers.

Tested also on Ralink RT3572 chipset.

Signed-off-by: Maital Hahn <maitalm@ti.com>
Signed-off-by: Yaniv Machani <yanivma@ti.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c66411d..42120d9 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -881,20 +881,22 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 
 	netif_carrier_off(sdata->dev);
 
+	/* flush STAs and mpaths on this iface */
+	sta_info_flush(sdata);
+	mesh_path_flush_by_iface(sdata);
+
 	/* stop the beacon */
 	ifmsh->mesh_id_len = 0;
 	sdata->vif.bss_conf.enable_beacon = false;
 	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+
+	/* remove beacon */
 	bcn = rcu_dereference_protected(ifmsh->beacon,
 					lockdep_is_held(&sdata->wdev.mtx));
 	RCU_INIT_POINTER(ifmsh->beacon, NULL);
 	kfree_rcu(bcn, rcu_head);
 
-	/* flush STAs and mpaths on this iface */
-	sta_info_flush(sdata);
-	mesh_path_flush_by_iface(sdata);
-
 	/* free all potentially still buffered group-addressed frames */
 	local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
 	skb_queue_purge(&ifmsh->ps.bc_buf);
-- 
cgit v0.10.2


From bce91f8a4247905b8c40a53f72c14db908cd0710 Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jarno@ovn.org>
Date: Mon, 1 Aug 2016 19:36:07 -0700
Subject: openvswitch: Remove incorrect WARN_ONCE().

ovs_ct_find_existing() issues a warning if an existing conntrack entry
classified as IP_CT_NEW is found, with the premise that this should
not happen.  However, a newly confirmed, non-expected conntrack entry
remains IP_CT_NEW as long as no reply direction traffic is seen.  This
has resulted into somewhat confusing kernel log messages.  This patch
removes this check and warning.

Fixes: 289f2253 ("openvswitch: Find existing conntrack entry after upcall.")
Suggested-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c644c78..e054a74 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -433,7 +433,6 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 	struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
-	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	unsigned int dataoff;
 	u8 protonum;
@@ -458,13 +457,8 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
-	ctinfo = ovs_ct_get_info(h);
-	if (ctinfo == IP_CT_NEW) {
-		/* This should not happen. */
-		WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
-	}
 	skb->nfct = &ct->ct_general;
-	skb->nfctinfo = ctinfo;
+	skb->nfctinfo = ovs_ct_get_info(h);
 	return ct;
 }
 
-- 
cgit v0.10.2


From ea966cb6b9e11e87f7b146549aef8e13cad5c6ff Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 2 Aug 2016 12:23:29 +0200
Subject: net: xgene: fix maybe-uninitialized variable

Building with -Wmaybe-uninitialized shows a potential use of
an uninitialized variable:

drivers/net/ethernet/apm/xgene/xgene_enet_hw.c: In function 'xgene_enet_phy_connect':
drivers/net/ethernet/apm/xgene/xgene_enet_hw.c:802:23: warning: 'phy_dev' may be used uninitialized in this function [-Wmaybe-uninitialized]

Although the compiler correctly identified this based on the function,
the current code is still safe as long dev->of_node is non-NULL
for the case of CONFIG_ACPI=n, which is currently the case.

The warning is now disabled by default, but still appears when
building with W=1, and other build test tools should be able to
detect it as well. Adding an #else clause here makes the code
more robust and makes it clear to the compiler that this cannot
happen.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 8089a96f601b ("drivers: net: xgene: Add backward compatibility")
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 37a0f46..18bb955 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -793,6 +793,8 @@ int xgene_enet_phy_connect(struct net_device *ndev)
 			netdev_err(ndev, "Could not connect to PHY\n");
 			return  -ENODEV;
 		}
+#else
+		return -ENODEV;
 #endif
 	}
 
-- 
cgit v0.10.2


From 1f415a74b0ca64b5bfacbb12d71ed2ec050a8cfb Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 2 Aug 2016 16:12:14 +0100
Subject: bpf: fix method of PTR_TO_PACKET reg id generation

Using per-register incrementing ID can lead to
find_good_pkt_pointers() confusing registers which
have completely different values.  Consider example:

0: (bf) r6 = r1
1: (61) r8 = *(u32 *)(r6 +76)
2: (61) r0 = *(u32 *)(r6 +80)
3: (bf) r7 = r8
4: (07) r8 += 32
5: (2d) if r8 > r0 goto pc+9
 R0=pkt_end R1=ctx R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=0,off=32,r=32) R10=fp
6: (bf) r8 = r7
7: (bf) r9 = r7
8: (71) r1 = *(u8 *)(r7 +0)
9: (0f) r8 += r1
10: (71) r1 = *(u8 *)(r7 +1)
11: (0f) r9 += r1
12: (07) r8 += 32
13: (2d) if r8 > r0 goto pc+1
 R0=pkt_end R1=inv56 R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=1,off=32,r=32) R9=pkt(id=1,off=0,r=32) R10=fp
14: (71) r1 = *(u8 *)(r9 +16)
15: (b7) r7 = 0
16: (bf) r0 = r7
17: (95) exit

We need to get a UNKNOWN_VALUE with imm to force id
generation so lines 0-5 make r7 a valid packet pointer.
We then read two different bytes from the packet and
add them to copies of the constructed packet pointer.
r8 (line 9) and r9 (line 11) will get the same id of 1,
independently.  When either of them is validated (line
13) - find_good_pkt_pointers() will also mark the other
as safe.  This leads to access on line 14 being mistakenly
considered safe.

Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f72f23b..7094c69 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -194,6 +194,7 @@ struct verifier_env {
 	struct verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
+	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
 };
 
@@ -1301,7 +1302,7 @@ add_imm:
 		/* dst_reg stays as pkt_ptr type and since some positive
 		 * integer value was added to the pointer, increment its 'id'
 		 */
-		dst_reg->id++;
+		dst_reg->id = ++env->id_gen;
 
 		/* something was added to pkt_ptr, set range and off to zero */
 		dst_reg->off = 0;
-- 
cgit v0.10.2


From 087d7a8c917491e6e7feb707a858d624bf5b5f14 Mon Sep 17 00:00:00 2001
From: Satish Baddipadige <satish.baddipadige@broadcom.com>
Date: Wed, 3 Aug 2016 09:43:59 +0530
Subject: tg3: Fix for diasllow rx coalescing time to be 0

When the rx coalescing time is 0, interrupts
are not generated from the controller and rx path hangs.
To avoid this rx hang, updating the driver to not allow
rx coalescing time to be 0.

Signed-off-by: Satish Baddipadige <satish.baddipadige@broadcom.com>
Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ff300f7..f3c6c91 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14014,6 +14014,7 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	}
 
 	if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) ||
+	    (!ec->rx_coalesce_usecs) ||
 	    (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) ||
 	    (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) ||
 	    (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) ||
-- 
cgit v0.10.2


From 9ce6fd7a81e6f787756be2f4b85f4f7bb3658de3 Mon Sep 17 00:00:00 2001
From: Siva Reddy Kallam <siva.kallam@broadcom.com>
Date: Wed, 3 Aug 2016 09:44:00 +0530
Subject: tg3: Report the correct number of RSS queues through tg3_get_rxnfc

This patch remove the wrong substraction from info->data in
tg3_get_rxnfc function. Without this patch, the number of RSS
queues reported is less by one.

Reported-by: Michal Soltys <soltys@ziu.info>
Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index f3c6c91..6592612 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12552,10 +12552,6 @@ static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 				info->data = TG3_RSS_MAX_NUM_QS;
 		}
 
-		/* The first interrupt vector only
-		 * handles link interrupts.
-		 */
-		info->data -= 1;
 		return 0;
 
 	default:
-- 
cgit v0.10.2


From 2b10d3ecf2dac737653828889ff85f614318f01a Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@qlogic.com>
Date: Wed, 3 Aug 2016 04:02:02 -0400
Subject: qlcnic: fix data structure corruption in async mbx command handling

This patch fixes a data structure corruption bug in the SRIOV VF mailbox
handler code. While handling mailbox commands from the atomic context,
driver is accessing and updating qlcnic_async_work_list_struct entry fields
in the async work list. These fields could be concurrently accessed by the
work function resulting in data corruption.

This patch restructures async mbx command handling by using a separate
async command list instead of using a list of work_struct structures.
A single work_struct is used to schedule and handle the async commands
with proper locking mechanism.

Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
index 017d8c2c..24061b9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
@@ -156,10 +156,8 @@ struct qlcnic_vf_info {
 	spinlock_t			vlan_list_lock;	/* Lock for VLAN list */
 };
 
-struct qlcnic_async_work_list {
+struct qlcnic_async_cmd {
 	struct list_head	list;
-	struct work_struct	work;
-	void			*ptr;
 	struct qlcnic_cmd_args	*cmd;
 };
 
@@ -168,7 +166,10 @@ struct qlcnic_back_channel {
 	struct workqueue_struct *bc_trans_wq;
 	struct workqueue_struct *bc_async_wq;
 	struct workqueue_struct *bc_flr_wq;
-	struct list_head	async_list;
+	struct qlcnic_adapter	*adapter;
+	struct list_head	async_cmd_list;
+	struct work_struct	vf_async_work;
+	spinlock_t		queue_lock; /* async_cmd_list queue lock */
 };
 
 struct qlcnic_sriov {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 7327b72..d710705 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -29,6 +29,7 @@
 #define QLC_83XX_VF_RESET_FAIL_THRESH	8
 #define QLC_BC_CMD_MAX_RETRY_CNT	5
 
+static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work);
 static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *);
 static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32);
 static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *);
@@ -177,7 +178,10 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs)
 	}
 
 	bc->bc_async_wq =  wq;
-	INIT_LIST_HEAD(&bc->async_list);
+	INIT_LIST_HEAD(&bc->async_cmd_list);
+	INIT_WORK(&bc->vf_async_work, qlcnic_sriov_handle_async_issue_cmd);
+	spin_lock_init(&bc->queue_lock);
+	bc->adapter = adapter;
 
 	for (i = 0; i < num_vfs; i++) {
 		vf = &sriov->vf_info[i];
@@ -1517,17 +1521,21 @@ static void qlcnic_vf_add_mc_list(struct net_device *netdev, const u8 *mac,
 
 void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc)
 {
-	struct list_head *head = &bc->async_list;
-	struct qlcnic_async_work_list *entry;
+	struct list_head *head = &bc->async_cmd_list;
+	struct qlcnic_async_cmd *entry;
 
 	flush_workqueue(bc->bc_async_wq);
+	cancel_work_sync(&bc->vf_async_work);
+
+	spin_lock(&bc->queue_lock);
 	while (!list_empty(head)) {
-		entry = list_entry(head->next, struct qlcnic_async_work_list,
+		entry = list_entry(head->next, struct qlcnic_async_cmd,
 				   list);
-		cancel_work_sync(&entry->work);
 		list_del(&entry->list);
+		kfree(entry->cmd);
 		kfree(entry);
 	}
+	spin_unlock(&bc->queue_lock);
 }
 
 void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
@@ -1587,57 +1595,64 @@ void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
 
 static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work)
 {
-	struct qlcnic_async_work_list *entry;
-	struct qlcnic_adapter *adapter;
+	struct qlcnic_async_cmd *entry, *tmp;
+	struct qlcnic_back_channel *bc;
 	struct qlcnic_cmd_args *cmd;
+	struct list_head *head;
+	LIST_HEAD(del_list);
+
+	bc = container_of(work, struct qlcnic_back_channel, vf_async_work);
+	head = &bc->async_cmd_list;
+
+	spin_lock(&bc->queue_lock);
+	list_splice_init(head, &del_list);
+	spin_unlock(&bc->queue_lock);
+
+	list_for_each_entry_safe(entry, tmp, &del_list, list) {
+		list_del(&entry->list);
+		cmd = entry->cmd;
+		__qlcnic_sriov_issue_cmd(bc->adapter, cmd);
+		kfree(entry);
+	}
+
+	if (!list_empty(head))
+		queue_work(bc->bc_async_wq, &bc->vf_async_work);
 
-	entry = container_of(work, struct qlcnic_async_work_list, work);
-	adapter = entry->ptr;
-	cmd = entry->cmd;
-	__qlcnic_sriov_issue_cmd(adapter, cmd);
 	return;
 }
 
-static struct qlcnic_async_work_list *
-qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc)
+static struct qlcnic_async_cmd *
+qlcnic_sriov_alloc_async_cmd(struct qlcnic_back_channel *bc,
+			     struct qlcnic_cmd_args *cmd)
 {
-	struct list_head *node;
-	struct qlcnic_async_work_list *entry = NULL;
-	u8 empty = 0;
+	struct qlcnic_async_cmd *entry = NULL;
 
-	list_for_each(node, &bc->async_list) {
-		entry = list_entry(node, struct qlcnic_async_work_list, list);
-		if (!work_pending(&entry->work)) {
-			empty = 1;
-			break;
-		}
-	}
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return NULL;
 
-	if (!empty) {
-		entry = kzalloc(sizeof(struct qlcnic_async_work_list),
-				GFP_ATOMIC);
-		if (entry == NULL)
-			return NULL;
-		list_add_tail(&entry->list, &bc->async_list);
-	}
+	entry->cmd = cmd;
+
+	spin_lock(&bc->queue_lock);
+	list_add_tail(&entry->list, &bc->async_cmd_list);
+	spin_unlock(&bc->queue_lock);
 
 	return entry;
 }
 
 static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc,
-					    work_func_t func, void *data,
 					    struct qlcnic_cmd_args *cmd)
 {
-	struct qlcnic_async_work_list *entry = NULL;
+	struct qlcnic_async_cmd *entry = NULL;
 
-	entry = qlcnic_sriov_get_free_node_async_work(bc);
-	if (!entry)
+	entry = qlcnic_sriov_alloc_async_cmd(bc, cmd);
+	if (!entry) {
+		qlcnic_free_mbx_args(cmd);
+		kfree(cmd);
 		return;
+	}
 
-	entry->ptr = data;
-	entry->cmd = cmd;
-	INIT_WORK(&entry->work, func);
-	queue_work(bc->bc_async_wq, &entry->work);
+	queue_work(bc->bc_async_wq, &bc->vf_async_work);
 }
 
 static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
@@ -1649,8 +1664,8 @@ static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
 	if (adapter->need_fw_reset)
 		return -EIO;
 
-	qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd,
-					adapter, cmd);
+	qlcnic_sriov_schedule_async_cmd(bc, cmd);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From fc4ca987f7cc0cb7ea8cb8bb673447939a84bb07 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@qlogic.com>
Date: Wed, 3 Aug 2016 04:02:03 -0400
Subject: qlcnic: fix napi budget alteration

Driver modifies the supplied NAPI budget in qlcnic_83xx_msix_tx_poll()
function. Instead, it should use the budget as it is.

Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 87c642d..fedd736 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -102,7 +102,6 @@
 #define QLCNIC_RESPONSE_DESC	0x05
 #define QLCNIC_LRO_DESC  	0x12
 
-#define QLCNIC_TX_POLL_BUDGET		128
 #define QLCNIC_TCP_HDR_SIZE		20
 #define QLCNIC_TCP_TS_OPTION_SIZE	12
 #define QLCNIC_FETCH_RING_ID(handle)	((handle) >> 63)
@@ -2008,7 +2007,6 @@ static int qlcnic_83xx_msix_tx_poll(struct napi_struct *napi, int budget)
 	struct qlcnic_host_tx_ring *tx_ring;
 	struct qlcnic_adapter *adapter;
 
-	budget = QLCNIC_TX_POLL_BUDGET;
 	tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi);
 	adapter = tx_ring->adapter;
 	work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget);
-- 
cgit v0.10.2


From b8b2372de9cc00d5ed667c7b8db29b6cfbf037f5 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@qlogic.com>
Date: Wed, 3 Aug 2016 04:02:04 -0400
Subject: qlcnic: Update version to 5.3.65

Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index fd973f4..49bad00 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -37,8 +37,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 64
-#define QLCNIC_LINUX_VERSIONID  "5.3.64"
+#define _QLCNIC_LINUX_SUBVERSION 65
+#define QLCNIC_LINUX_VERSIONID  "5.3.65"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
-- 
cgit v0.10.2


From 6b07d9ca9b5363dda959b9582a3fc9c0b89ef3b5 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 2 Aug 2016 11:13:41 +0200
Subject: mac80211: fix purging multicast PS buffer queue

The code currently assumes that buffered multicast PS frames don't have
a pending ACK frame for tx status reporting.
However, hostapd sends a broadcast deauth frame on teardown for which tx
status is requested. This can lead to the "Have pending ack frames"
warning on module reload.
Fix this by using ieee80211_free_txskb/ieee80211_purge_tx_queue.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 47e99ab8..543b1d4 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -869,7 +869,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 
 	/* free all potentially still buffered bcast frames */
 	local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
-	skb_queue_purge(&sdata->u.ap.ps.bc_buf);
+	ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
 
 	mutex_lock(&local->mtx);
 	ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 91461c4..5023966 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -368,7 +368,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 		skb = skb_dequeue(&ps->bc_buf);
 		if (skb) {
 			purged++;
-			dev_kfree_skb(skb);
+			ieee80211_free_txskb(&local->hw, skb);
 		}
 		total += skb_queue_len(&ps->bc_buf);
 	}
@@ -451,7 +451,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) {
 		ps_dbg(tx->sdata,
 		       "BC TX buffer full - dropping the oldest frame\n");
-		dev_kfree_skb(skb_dequeue(&ps->bc_buf));
+		ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf));
 	} else
 		tx->local->total_ps_buffered++;
 
@@ -4275,7 +4275,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 			sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
 		if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb))
 			break;
-		dev_kfree_skb_any(skb);
+		ieee80211_free_txskb(hw, skb);
 	}
 
 	info = IEEE80211_SKB_CB(skb);
-- 
cgit v0.10.2


From 71f2c3470fca51be27f6fc5975675f7e1c3b91e5 Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Tue, 2 Aug 2016 17:16:57 +0900
Subject: mac80211: End the MPSP even if EOSP frame was not acked

If QoS frame with EOSP (end of service period) subfield=1 sent by local
peer was not acked by remote peer, local peer did not end the MPSP. This
prevents local peer from going to DOZE state. And if the remote peer
goes away without closing connection, local peer continues AWAKE state
and wastes battery.

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
Acked-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index c6d5c72..a2a6826 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -771,6 +771,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			clear_sta_flag(sta, WLAN_STA_SP);
 
 		acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+
+		/* mesh Peer Service Period support */
+		if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+		    ieee80211_is_data_qos(fc))
+			ieee80211_mpsp_trigger_process(
+				ieee80211_get_qos_ctl(hdr), sta, true, acked);
+
 		if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) {
 			/*
 			 * The STA is in power save mode, so assume
@@ -781,13 +788,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		}
 
-		/* mesh Peer Service Period support */
-		if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
-		    ieee80211_is_data_qos(fc))
-			ieee80211_mpsp_trigger_process(
-					ieee80211_get_qos_ctl(hdr),
-					sta, true, acked);
-
 		if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
 		    (ieee80211_is_data(hdr->frame_control)) &&
 		    (rates_idx != -1))
-- 
cgit v0.10.2


From 9757235f451c27deaa88925399f070ff6fcea832 Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Wed, 3 Aug 2016 10:07:44 +0900
Subject: nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value

Previously, NL80211_MESHCONF_HT_OPMODE validation rejected correct
flag combinations, e.g. IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT.

Doing just a range-check allows setting flags that don't exist (0x8)
and invalid flag combinations.

Implements some checks based on IEEE 802.11 2012 8.4.2.59 "HT
Operation element".

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
[reword commit message, simplify a bit]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 46417f9..f02653a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5380,6 +5380,7 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
 {
 	struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
 	u32 mask = 0;
+	u16 ht_opmode;
 
 #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
 do {									    \
@@ -5471,9 +5472,36 @@ do {									    \
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
 				  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
 				  nl80211_check_s32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
-				  mask, NL80211_MESHCONF_HT_OPMODE,
-				  nl80211_check_u16);
+	/*
+	 * Check HT operation mode based on
+	 * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+	 */
+	if (tb[NL80211_MESHCONF_HT_OPMODE]) {
+		ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
+
+		if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION |
+				  IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+				  IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+			return -EINVAL;
+
+		if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
+		    (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+			return -EINVAL;
+
+		switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
+		case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
+		case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
+			if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
+				return -EINVAL;
+			break;
+		case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
+		case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
+			if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+				return -EINVAL;
+			break;
+		}
+		cfg->ht_opmode = ht_opmode;
+	}
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
-- 
cgit v0.10.2


From 2439ca0402091badb24415e1b073ba12b34ba423 Mon Sep 17 00:00:00 2001
From: Maxim Altshul <maxim.altshul@ti.com>
Date: Thu, 4 Aug 2016 15:43:04 +0300
Subject: mac80211: Add ieee80211_hw pointer to get_expected_throughput

The variable is added to allow the driver an easy access to
it's own hw->priv when the op is invoked.

This fixes a crash in wlcore because it was relying on a
station pointer that wasn't initialized yet. It's the wrong
way to fix the crash, but it solves the problem for now and
it does make sense to have the hw pointer here.

Signed-off-by: Maxim Altshul <maxim.altshul@ti.com>
[rewrite commit message, fix indentation]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 1d68916..9e1f2d9 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5700,10 +5700,11 @@ out:
 	mutex_unlock(&wl->mutex);
 }
 
-static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta)
+static u32 wlcore_op_get_expected_throughput(struct ieee80211_hw *hw,
+					     struct ieee80211_sta *sta)
 {
 	struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv;
-	struct wl1271 *wl = wl_sta->wl;
+	struct wl1271 *wl = hw->priv;
 	u8 hlid = wl_sta->hlid;
 
 	/* return in units of Kbps */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b4faadb..cca510a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3620,7 +3620,8 @@ struct ieee80211_ops {
 
 	int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
-	u32 (*get_expected_throughput)(struct ieee80211_sta *sta);
+	u32 (*get_expected_throughput)(struct ieee80211_hw *hw,
+				       struct ieee80211_sta *sta);
 	int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			   int *dbm);
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 184473c..ba5fc1f 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1094,7 +1094,7 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
 
 	trace_drv_get_expected_throughput(sta);
 	if (local->ops->get_expected_throughput)
-		ret = local->ops->get_expected_throughput(sta);
+		ret = local->ops->get_expected_throughput(&local->hw, sta);
 	trace_drv_return_u32(local, ret);
 
 	return ret;
-- 
cgit v0.10.2


From 5ef9f289c4e698054e5687edb54f0da3cdc9173a Mon Sep 17 00:00:00 2001
From: Ian Wienand <iwienand@redhat.com>
Date: Wed, 3 Aug 2016 15:44:57 +1000
Subject: OVS: Ignore negative headroom value

net_device->ndo_set_rx_headroom (introduced in
871b642adebe300be2e50aa5f65a418510f636ec) says

  "Setting a negtaive value reset the rx headroom
   to the default value".

It seems that the OVS implementation in
3a927bc7cf9d0fbe8f4a8189dd5f8440228f64e7 overlooked this and sets
dev->needed_headroom unconditionally.

This doesn't have an immediate effect, but can mess up later
LL_RESERVED_SPACE calculations, such as done in
net/ipv6/mcast.c:mld_newpack.  For reference, this issue was found
from a skb_panic raised there after the length calculations had given
the wrong result.

Note the other current users of this interface
(drivers/net/tun.c:tun_set_headroom and
drivers/net/veth.c:veth_set_rx_headroom) are both checking this
correctly thus need no modification.

Thanks to Ben for some pointers from the crash dumps!

Cc: Benjamin Poirier <bpoirier@suse.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1361414
Signed-off-by: Ian Wienand <iwienand@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 434e04c..95c3614 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -140,7 +140,7 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
 {
-	dev->needed_headroom = new_hr;
+	dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
 }
 
 static const struct net_device_ops internal_dev_netdev_ops = {
-- 
cgit v0.10.2


From 54447f1ad73414ebb052e9a33d079cabed3a03e8 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyj.lk@gmail.com>
Date: Wed, 3 Aug 2016 10:58:35 +0000
Subject: net: arc_emac: add missing of_node_put() in arc_emac_probe()

commit a94efbd7cc45 ("ethernet: arc: emac_main: add missing of_node_put
after calling of_parse_phandle") added missing of_node_put after calling
of_parse_phandle, but missing the devm_ioremap_resource() error handling
case.

Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com>
Reviewed-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 4bff0f3..b0da969 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -771,8 +771,10 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 	priv->dev = dev;
 
 	priv->regs = devm_ioremap_resource(dev, &res_regs);
-	if (IS_ERR(priv->regs))
-		return PTR_ERR(priv->regs);
+	if (IS_ERR(priv->regs)) {
+		err = PTR_ERR(priv->regs);
+		goto out_put_node;
+	}
 
 	dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs);
 
-- 
cgit v0.10.2


From 372ee16386bbf6dc5eeb0387e1ede963debba82a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 3 Aug 2016 14:11:40 +0100
Subject: rxrpc: Fix races between skb free, ACK generation and replying

Inside the kafs filesystem it is possible to occasionally have a call
processed and terminated before we've had a chance to check whether we need
to clean up the rx queue for that call because afs_send_simple_reply() ends
the call when it is done, but this is done in a workqueue item that might
happen to run to completion before afs_deliver_to_call() completes.

Further, it is possible for rxrpc_kernel_send_data() to be called to send a
reply before the last request-phase data skb is released.  The rxrpc skb
destructor is where the ACK processing is done and the call state is
advanced upon release of the last skb.  ACK generation is also deferred to
a work item because it's possible that the skb destructor is not called in
a context where kernel_sendmsg() can be invoked.

To this end, the following changes are made:

 (1) kernel_rxrpc_data_consumed() is added.  This should be called whenever
     an skb is emptied so as to crank the ACK and call states.  This does
     not release the skb, however.  kernel_rxrpc_free_skb() must now be
     called to achieve that.  These together replace
     rxrpc_kernel_data_delivered().

 (2) kernel_rxrpc_data_consumed() is wrapped by afs_data_consumed().

     This makes afs_deliver_to_call() easier to work as the skb can simply
     be discarded unconditionally here without trying to work out what the
     return value of the ->deliver() function means.

     The ->deliver() functions can, via afs_data_complete(),
     afs_transfer_reply() and afs_extract_data() mark that an skb has been
     consumed (thereby cranking the state) without the need to
     conditionally free the skb to make sure the state is correct on an
     incoming call for when the call processor tries to send the reply.

 (3) rxrpc_recvmsg() now has to call kernel_rxrpc_data_consumed() when it
     has finished with a packet and MSG_PEEK isn't set.

 (4) rxrpc_packet_destructor() no longer calls rxrpc_hard_ACK_data().

     Because of this, we no longer need to clear the destructor and put the
     call before we free the skb in cases where we don't want the ACK/call
     state to be cranked.

 (5) The ->deliver() call-type callbacks are made to return -EAGAIN rather
     than 0 if they expect more data (afs_extract_data() returns -EAGAIN to
     the delivery function already), and the caller is now responsible for
     producing an abort if that was the last packet.

 (6) There are many bits of unmarshalling code where:

 		ret = afs_extract_data(call, skb, last, ...);
		switch (ret) {
		case 0:		break;
		case -EAGAIN:	return 0;
		default:	return ret;
		}

     is to be found.  As -EAGAIN can now be passed back to the caller, we
     now just return if ret < 0:

 		ret = afs_extract_data(call, skb, last, ...);
		if (ret < 0)
			return ret;

 (7) Checks for trailing data and empty final data packets has been
     consolidated as afs_data_complete().  So:

		if (skb->len > 0)
			return -EBADMSG;
		if (!last)
			return 0;

     becomes:

		ret = afs_data_complete(call, skb, last);
		if (ret < 0)
			return ret;

 (8) afs_transfer_reply() now checks the amount of data it has against the
     amount of data desired and the amount of data in the skb and returns
     an error to induce an abort if we don't get exactly what we want.

Without these changes, the following oops can occasionally be observed,
particularly if some printks are inserted into the delivery path:

general protection fault: 0000 [#1] SMP
Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc]
CPU: 0 PID: 1305 Comm: kworker/u8:3 Tainted: G            E   4.7.0-fsdevel+ #1303
Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
Workqueue: kafsd afs_async_workfn [kafs]
task: ffff88040be041c0 ti: ffff88040c070000 task.ti: ffff88040c070000
RIP: 0010:[<ffffffff8108fd3c>]  [<ffffffff8108fd3c>] __lock_acquire+0xcf/0x15a1
RSP: 0018:ffff88040c073bc0  EFLAGS: 00010002
RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000000 RCX: ffff88040d29a710
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88040d29a710
RBP: ffff88040c073c70 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88040be041c0 R15: ffffffff814c928f
FS:  0000000000000000(0000) GS:ffff88041fa00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fa4595f4750 CR3: 0000000001c14000 CR4: 00000000001406f0
Stack:
 0000000000000006 000000000be04930 0000000000000000 ffff880400000000
 ffff880400000000 ffffffff8108f847 ffff88040be041c0 ffffffff81050446
 ffff8803fc08a920 ffff8803fc08a958 ffff88040be041c0 ffff88040c073c38
Call Trace:
 [<ffffffff8108f847>] ? mark_held_locks+0x5e/0x74
 [<ffffffff81050446>] ? __local_bh_enable_ip+0x9b/0xa1
 [<ffffffff8108f9ca>] ? trace_hardirqs_on_caller+0x16d/0x189
 [<ffffffff810915f4>] lock_acquire+0x122/0x1b6
 [<ffffffff810915f4>] ? lock_acquire+0x122/0x1b6
 [<ffffffff814c928f>] ? skb_dequeue+0x18/0x61
 [<ffffffff81609dbf>] _raw_spin_lock_irqsave+0x35/0x49
 [<ffffffff814c928f>] ? skb_dequeue+0x18/0x61
 [<ffffffff814c928f>] skb_dequeue+0x18/0x61
 [<ffffffffa009aa92>] afs_deliver_to_call+0x344/0x39d [kafs]
 [<ffffffffa009ab37>] afs_process_async_call+0x4c/0xd5 [kafs]
 [<ffffffffa0099e9c>] afs_async_workfn+0xe/0x10 [kafs]
 [<ffffffff81063a3a>] process_one_work+0x29d/0x57c
 [<ffffffff81064ac2>] worker_thread+0x24a/0x385
 [<ffffffff81064878>] ? rescuer_thread+0x2d0/0x2d0
 [<ffffffff810696f5>] kthread+0xf3/0xfb
 [<ffffffff8160a6ff>] ret_from_fork+0x1f/0x40
 [<ffffffff81069602>] ? kthread_create_on_node+0x1cf/0x1cf

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 16a924c..70c926a 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -790,13 +790,12 @@ The kernel interface functions are as follows:
      Data messages can have their contents extracted with the usual bunch of
      socket buffer manipulation functions.  A data message can be determined to
      be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
-     data message has been used up, rxrpc_kernel_data_delivered() should be
-     called on it..
+     data message has been used up, rxrpc_kernel_data_consumed() should be
+     called on it.
 
-     Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
-     of.  It is possible to get extra refs on all types of message for later
-     freeing, but this may pin the state of a call until the message is finally
-     freed.
+     Messages should be handled to rxrpc_kernel_free_skb() to dispose of.  It
+     is possible to get extra refs on all types of message for later freeing,
+     but this may pin the state of a call until the message is finally freed.
 
  (*) Accept an incoming call.
 
@@ -821,12 +820,14 @@ The kernel interface functions are as follows:
      Other errors may be returned if the call had been aborted (-ECONNABORTED)
      or had timed out (-ETIME).
 
- (*) Record the delivery of a data message and free it.
+ (*) Record the delivery of a data message.
 
-	void rxrpc_kernel_data_delivered(struct sk_buff *skb);
+	void rxrpc_kernel_data_consumed(struct rxrpc_call *call,
+					struct sk_buff *skb);
 
-     This is used to record a data message as having been delivered and to
-     update the ACK state for the call.  The socket buffer will be freed.
+     This is used to record a data message as having been consumed and to
+     update the ACK state for the call.  The message must still be passed to
+     rxrpc_kernel_free_skb() for disposal by the caller.
 
  (*) Free a message.
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4b0eff6..85737e9 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 	case 1:
 		_debug("extract FID count");
 		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("FID count: %u", call->count);
@@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 		_debug("extract FID array");
 		ret = afs_extract_data(call, skb, last, call->buffer,
 				       call->count * 3 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		_debug("unmarshall FID array");
 		call->request = kcalloc(call->count,
@@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 	case 3:
 		_debug("extract CB count");
 		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		tmp = ntohl(call->tmp);
 		_debug("CB count: %u", tmp);
@@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 		_debug("extract CB array");
 		ret = afs_extract_data(call, skb, last, call->request,
 				       call->count * 3 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		_debug("unmarshall CB array");
 		cb = call->request;
@@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 		call->unmarshall++;
 
 	case 5:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
+		ret = afs_data_complete(call, skb, last);
+		if (ret < 0)
+			return ret;
 
 		/* Record that the message was unmarshalled successfully so
 		 * that the call destructor can know do the callback breaking
@@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 		break;
 	}
 
-	if (!last)
-		return 0;
 
 	call->state = AFS_CALL_REPLYING;
 
@@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
 {
 	struct afs_server *server;
 	struct in_addr addr;
+	int ret;
 
 	_enter(",{%u},%d", skb->len, last);
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	ret = afs_data_complete(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* no unmarshalling required */
 	call->state = AFS_CALL_REPLYING;
@@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
 
 	_enter(",{%u},%d", skb->len, last);
 
+	/* There are some arguments that we ignore */
+	afs_data_consumed(call, skb);
 	if (!last)
-		return 0;
+		return -EAGAIN;
 
 	/* no unmarshalling required */
 	call->state = AFS_CALL_REPLYING;
@@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
 static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
 				bool last)
 {
+	int ret;
+
 	_enter(",{%u},%d", skb->len, last);
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	ret = afs_data_complete(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* no unmarshalling required */
 	call->state = AFS_CALL_REPLYING;
@@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
 
 	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	ret = afs_data_complete(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	switch (call->unmarshall) {
 	case 0:
@@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
 		break;
 	}
 
-	if (!last)
-		return 0;
+	ret = afs_data_complete(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	call->state = AFS_CALL_REPLYING;
 
@@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
 static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
 						 struct sk_buff *skb, bool last)
 {
+	int ret;
+
 	_enter(",{%u},%d", skb->len, last);
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	ret = afs_data_complete(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* no unmarshalling required */
 	call->state = AFS_CALL_REPLYING;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c2e930e..9312b92 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
 	_enter(",,%u", last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 	case 1:
 		_debug("extract data length (MSW)");
 		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("DATA length MSW: %u", call->count);
@@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 	case 2:
 		_debug("extract data length");
 		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("DATA length: %u", call->count);
@@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 			ret = afs_extract_data(call, skb, last, buffer,
 					       call->count);
 			kunmap_atomic(buffer);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			if (ret < 0)
+				return ret;
 		}
 
 		call->offset = 0;
@@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 	case 4:
 		ret = afs_extract_data(call, skb, last, call->buffer,
 				       (21 + 3 + 6) * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		bp = call->buffer;
 		xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
@@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 		call->unmarshall++;
 
 	case 5:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
+		ret = afs_data_complete(call, skb, last);
+		if (ret < 0)
+			return ret;
 		break;
 	}
 
-	if (!last)
-		return 0;
-
 	if (call->count < PAGE_SIZE) {
 		_debug("clear");
 		page = call->reply3;
@@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
 {
 	_enter(",{%u},%d", skb->len, last);
 
-	if (skb->len > 0)
-		return -EBADMSG; /* shouldn't be any reply data */
-	return 0;
+	/* shouldn't be any reply data */
+	return afs_data_complete(call, skb, last);
 }
 
 /*
@@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call,
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
 	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call,
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
 	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call,
 {
 	struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
 	const __be32 *bp;
+	int ret;
 
 	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
 	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call,
 {
 	struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
 	const __be32 *bp;
+	int ret;
 
 	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call,
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
 	_enter(",,%u", last);
 
-	afs_transfer_reply(call, skb);
-	if (!last) {
-		_leave(" = 0 [more]");
-		return 0;
-	}
-
-	if (call->reply_size != call->reply_max) {
-		_leave(" = -EBADMSG [%u != %u]",
-		       call->reply_size, call->reply_max);
-		return -EBADMSG;
-	}
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call,
 	afs_dataversion_t *store_version;
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
 	_enter(",,%u", last);
 
-	afs_transfer_reply(call, skb);
-	if (!last) {
-		_leave(" = 0 [more]");
-		return 0;
-	}
-
-	if (call->reply_size != call->reply_max) {
-		_leave(" = -EBADMSG [%u != %u]",
-		       call->reply_size, call->reply_max);
-		return -EBADMSG;
-	}
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	store_version = NULL;
@@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 		_debug("extract status");
 		ret = afs_extract_data(call, skb, last, call->buffer,
 				       12 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		bp = call->buffer;
 		xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
@@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 		/* extract the volume name length */
 	case 2:
 		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("volname length: %u", call->count);
@@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 		if (call->count > 0) {
 			ret = afs_extract_data(call, skb, last, call->reply3,
 					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			if (ret < 0)
+				return ret;
 		}
 
 		p = call->reply3;
@@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 	case 4:
 		ret = afs_extract_data(call, skb, last, call->buffer,
 				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->offset = 0;
 		call->unmarshall++;
@@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 		/* extract the offline message length */
 	case 5:
 		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("offline msg length: %u", call->count);
@@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 		if (call->count > 0) {
 			ret = afs_extract_data(call, skb, last, call->reply3,
 					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			if (ret < 0)
+				return ret;
 		}
 
 		p = call->reply3;
@@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 	case 7:
 		ret = afs_extract_data(call, skb, last, call->buffer,
 				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->offset = 0;
 		call->unmarshall++;
@@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 		/* extract the message of the day length */
 	case 8:
 		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("motd length: %u", call->count);
@@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 		if (call->count > 0) {
 			ret = afs_extract_data(call, skb, last, call->reply3,
 					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			if (ret < 0)
+				return ret;
 		}
 
 		p = call->reply3;
@@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 	case 10:
 		ret = afs_extract_data(call, skb, last, call->buffer,
 				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		if (ret < 0)
+			return ret;
 
 		call->offset = 0;
 		call->unmarshall++;
 	no_motd_padding:
 
 	case 11:
-		_debug("trailer %d", skb->len);
-		if (skb->len != 0)
-			return -EBADMSG;
+		ret = afs_data_complete(call, skb, last);
+		if (ret < 0)
+			return ret;
 		break;
 	}
 
-	if (!last)
-		return 0;
-
 	_leave(" = 0 [done]");
 	return 0;
 }
@@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
 				    struct sk_buff *skb, bool last)
 {
 	const __be32 *bp;
+	int ret;
 
 	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 71d5982..df976b2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *);
  */
 extern int afs_open_socket(void);
 extern void afs_close_socket(void);
+extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
 extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
 			 const struct afs_wait_mode *);
 extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
 					    size_t, size_t);
 extern void afs_flat_call_destructor(struct afs_call *);
-extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
+extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
 extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
 			    size_t);
 
+static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
+				    bool last)
+{
+	if (skb->len > 0)
+		return -EBADMSG;
+	afs_data_consumed(call, skb);
+	if (!last)
+		return -EAGAIN;
+	return 0;
+}
+
 /*
  * security.c
  */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 4832de84..14d04c8 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -150,10 +150,9 @@ void afs_close_socket(void)
 }
 
 /*
- * note that the data in a socket buffer is now delivered and that the buffer
- * should be freed
+ * Note that the data in a socket buffer is now consumed.
  */
-static void afs_data_delivered(struct sk_buff *skb)
+void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
 {
 	if (!skb) {
 		_debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
@@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb)
 	} else {
 		_debug("DLVR %p{%u} [%d]",
 		       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-		if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-			BUG();
-		rxrpc_kernel_data_delivered(skb);
+		rxrpc_kernel_data_consumed(call->rxcall, skb);
 	}
 }
 
@@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call)
 			last = rxrpc_kernel_is_data_last(skb);
 			ret = call->type->deliver(call, skb, last);
 			switch (ret) {
+			case -EAGAIN:
+				if (last) {
+					_debug("short data");
+					goto unmarshal_error;
+				}
+				break;
 			case 0:
-				if (last &&
-				    call->state == AFS_CALL_AWAIT_REPLY)
+				ASSERT(last);
+				if (call->state == AFS_CALL_AWAIT_REPLY)
 					call->state = AFS_CALL_COMPLETE;
 				break;
 			case -ENOTCONN:
@@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 				abort_code = RX_INVALID_OPERATION;
 				goto do_abort;
 			default:
+			unmarshal_error:
 				abort_code = RXGEN_CC_UNMARSHAL;
 				if (call->state != AFS_CALL_AWAIT_REPLY)
 					abort_code = RXGEN_SS_UNMARSHAL;
@@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 				call->state = AFS_CALL_ERROR;
 				break;
 			}
-			afs_data_delivered(skb);
-			skb = NULL;
-			continue;
+			break;
 		case RXRPC_SKB_MARK_FINAL_ACK:
 			_debug("Rcv ACK");
 			call->state = AFS_CALL_COMPLETE;
@@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call)
 }
 
 /*
- * empty a socket buffer into a flat reply buffer
+ * Empty a socket buffer into a flat reply buffer.
  */
-void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
 {
 	size_t len = skb->len;
 
-	if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
-		BUG();
-	call->reply_size += len;
+	if (len > call->reply_max - call->reply_size) {
+		_leave(" = -EBADMSG [%zu > %u]",
+		       len, call->reply_max - call->reply_size);
+		return -EBADMSG;
+	}
+
+	if (len > 0) {
+		if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
+				  len) < 0)
+			BUG();
+		call->reply_size += len;
+	}
+
+	afs_data_consumed(call, skb);
+	if (!last)
+		return -EAGAIN;
+
+	if (call->reply_size != call->reply_max) {
+		_leave(" = -EBADMSG [%u != %u]",
+		       call->reply_size, call->reply_max);
+		return -EBADMSG;
+	}
+	return 0;
 }
 
 /*
@@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work)
 }
 
 /*
- * grab the operation ID from an incoming cache manager call
+ * Grab the operation ID from an incoming cache manager call.  The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
  */
 static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
 				bool last)
@@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
 	call->offset += len;
 
 	if (call->offset < 4) {
-		if (last) {
-			_leave(" = -EBADMSG [op ID short]");
-			return -EBADMSG;
-		}
-		_leave(" = 0 [incomplete]");
-		return 0;
+		afs_data_consumed(call, skb);
+		_leave(" = -EAGAIN");
+		return -EAGAIN;
 	}
 
 	call->state = AFS_CALL_AWAIT_REQUEST;
@@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 }
 
 /*
- * extract a piece of data from the received data socket buffers
+ * Extract a piece of data from the received data socket buffers.
  */
 int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
 		     bool last, void *buf, size_t count)
@@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
 	call->offset += len;
 
 	if (call->offset < count) {
-		if (last) {
-			_leave(" = -EBADMSG [%d < %zu]", call->offset, count);
-			return -EBADMSG;
-		}
+		afs_data_consumed(call, skb);
 		_leave(" = -EAGAIN");
 		return -EAGAIN;
 	}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 340afd0..f94d1ab 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
 	struct afs_cache_vlocation *entry;
 	__be32 *bp;
 	u32 tmp;
-	int loop;
+	int loop, ret;
 
 	_enter(",,%u", last);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call, skb, last);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	entry = call->reply;
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ac1bc3c..7b0f886 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -40,12 +40,12 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
 					   unsigned long,
 					   gfp_t);
 int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t);
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
 void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
 void rxrpc_kernel_end_call(struct rxrpc_call *);
 bool rxrpc_kernel_is_data_last(struct sk_buff *);
 u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
 int rxrpc_kernel_get_error_number(struct sk_buff *);
-void rxrpc_kernel_data_delivered(struct sk_buff *);
 void rxrpc_kernel_free_skb(struct sk_buff *);
 struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long);
 int rxrpc_kernel_reject_call(struct socket *);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 1bb9e7a..ff83fb1 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -425,6 +425,7 @@ struct rxrpc_call {
 	spinlock_t		lock;
 	rwlock_t		state_lock;	/* lock for state transition */
 	atomic_t		usage;
+	atomic_t		skb_count;	/* Outstanding packets on this call */
 	atomic_t		sequence;	/* Tx data packet sequence counter */
 	u32			local_abort;	/* local abort code */
 	u32			remote_abort;	/* remote abort code */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0b28321..9bae21e 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -130,6 +130,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 			call->state = RXRPC_CALL_SERVER_ACCEPTING;
 			list_add_tail(&call->accept_link, &rx->acceptq);
 			rxrpc_get_call(call);
+			atomic_inc(&call->skb_count);
 			nsp = rxrpc_skb(notification);
 			nsp->call = call;
 
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index fc32aa5..f5e9916 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -460,6 +460,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
 	ASSERTCMP(sp->call, ==, NULL);
 	sp->call = call;
 	rxrpc_get_call(call);
+	atomic_inc(&call->skb_count);
 
 	/* insert into the buffer in sequence order */
 	spin_lock_bh(&call->lock);
@@ -734,6 +735,7 @@ all_acked:
 		skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
 		sp->call = call;
 		rxrpc_get_call(call);
+		atomic_inc(&call->skb_count);
 		spin_lock_bh(&call->lock);
 		if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
 			BUG();
@@ -793,6 +795,7 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
 		sp->error = error;
 		sp->call = call;
 		rxrpc_get_call(call);
+		atomic_inc(&call->skb_count);
 
 		spin_lock_bh(&call->lock);
 		ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 91287c9..c47f14f 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -491,13 +491,6 @@ void rxrpc_release_call(struct rxrpc_call *call)
 		spin_lock_bh(&call->lock);
 		while ((skb = skb_dequeue(&call->rx_queue)) ||
 		       (skb = skb_dequeue(&call->rx_oos_queue))) {
-			sp = rxrpc_skb(skb);
-			if (sp->call) {
-				ASSERTCMP(sp->call, ==, call);
-				rxrpc_put_call(call);
-				sp->call = NULL;
-			}
-			skb->destructor = NULL;
 			spin_unlock_bh(&call->lock);
 
 			_debug("- zap %s %%%u #%u",
@@ -605,6 +598,7 @@ void __rxrpc_put_call(struct rxrpc_call *call)
 
 	if (atomic_dec_and_test(&call->usage)) {
 		_debug("call %d dead", call->debug_id);
+		WARN_ON(atomic_read(&call->skb_count) != 0);
 		ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
 		rxrpc_queue_work(&call->destroyer);
 	}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 991a20d..9e0f58e 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -55,9 +55,6 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
 	if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
 		_debug("already terminated");
 		ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
-		skb->destructor = NULL;
-		sp->call = NULL;
-		rxrpc_put_call(call);
 		rxrpc_free_skb(skb);
 		return 0;
 	}
@@ -111,13 +108,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
 	ret = 0;
 
 out:
-	/* release the socket buffer */
-	if (skb) {
-		skb->destructor = NULL;
-		sp->call = NULL;
-		rxrpc_put_call(call);
-		rxrpc_free_skb(skb);
-	}
+	rxrpc_free_skb(skb);
 
 	_leave(" = %d", ret);
 	return ret;
@@ -200,6 +191,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 
 	sp->call = call;
 	rxrpc_get_call(call);
+	atomic_inc(&call->skb_count);
 	terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
 		    !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
 	ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a3fa2ed..9ed66d5 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -203,6 +203,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		}
 
 		/* we transferred the whole data packet */
+		if (!(flags & MSG_PEEK))
+			rxrpc_kernel_data_consumed(call, skb);
+
 		if (sp->hdr.flags & RXRPC_LAST_PACKET) {
 			_debug("last");
 			if (rxrpc_conn_is_client(call->conn)) {
@@ -360,28 +363,6 @@ wait_error:
 }
 
 /**
- * rxrpc_kernel_data_delivered - Record delivery of data message
- * @skb: Message holding data
- *
- * Record the delivery of a data message.  This permits RxRPC to keep its
- * tracking correct.  The socket buffer will be deleted.
- */
-void rxrpc_kernel_data_delivered(struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rxrpc_call *call = sp->call;
-
-	ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
-	ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
-	call->rx_data_recv = sp->hdr.seq;
-
-	ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
-	rxrpc_free_skb(skb);
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
-
-/**
  * rxrpc_kernel_is_data_last - Determine if data message is last one
  * @skb: Message holding data
  *
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index eee0cfd9..06c51d4 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -98,11 +98,39 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
 	spin_unlock_bh(&call->lock);
 }
 
+/**
+ * rxrpc_kernel_data_consumed - Record consumption of data message
+ * @call: The call to which the message pertains.
+ * @skb: Message holding data
+ *
+ * Record the consumption of a data message and generate an ACK if appropriate.
+ * The call state is shifted if this was the final packet.  The caller must be
+ * in process context with no spinlocks held.
+ *
+ * TODO: Actually generate the ACK here rather than punting this to the
+ * workqueue.
+ */
+void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	_enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq);
+
+	ASSERTCMP(sp->call, ==, call);
+	ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA);
+
+	/* TODO: Fix the sequence number tracking */
+	ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
+	ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
+	ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
+
+	call->rx_data_recv = sp->hdr.seq;
+	rxrpc_hard_ACK_data(call, sp);
+}
+EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
+
 /*
- * destroy a packet that has an RxRPC control buffer
- * - advance the hard-ACK state of the parent call (done here in case something
- *   in the kernel bypasses recvmsg() and steals the packet directly off of the
- *   socket receive queue)
+ * Destroy a packet that has an RxRPC control buffer
  */
 void rxrpc_packet_destructor(struct sk_buff *skb)
 {
@@ -112,9 +140,8 @@ void rxrpc_packet_destructor(struct sk_buff *skb)
 	_enter("%p{%p}", skb, call);
 
 	if (call) {
-		/* send the final ACK on a client call */
-		if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
-			rxrpc_hard_ACK_data(call, sp);
+		if (atomic_dec_return(&call->skb_count) < 0)
+			BUG();
 		rxrpc_put_call(call);
 		sp->call = NULL;
 	}
-- 
cgit v0.10.2


From 94d9f1c5906b20053efe375b6d66610bca4b8b64 Mon Sep 17 00:00:00 2001
From: David Forster <dforster@brocade.com>
Date: Wed, 3 Aug 2016 15:13:01 +0100
Subject: ipv4: panic in leaf_walk_rcu due to stale node pointer

Panic occurs when issuing "cat /proc/net/route" whilst
populating FIB with > 1M routes.

Use of cached node pointer in fib_route_get_idx is unsafe.

 BUG: unable to handle kernel paging request at ffffc90001630024
 IP: [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
 PGD 11b08d067 PUD 11b08e067 PMD dac4b067 PTE 0
 Oops: 0000 [#1] SMP
 Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscac
 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep virti
 acpi_cpufreq button parport_pc ppdev lp parport autofs4 ext4 crc16 mbcache jbd
tio_ring virtio floppy uhci_hcd ehci_hcd usbcore usb_common libata scsi_mod
 CPU: 1 PID: 785 Comm: cat Not tainted 4.2.0-rc8+ #4
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
 task: ffff8800da1c0bc0 ti: ffff88011a05c000 task.ti: ffff88011a05c000
 RIP: 0010:[<ffffffff814cf6a0>]  [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
 RSP: 0018:ffff88011a05fda0  EFLAGS: 00010202
 RAX: ffff8800d8a40c00 RBX: ffff8800da4af940 RCX: ffff88011a05ff20
 RDX: ffffc90001630020 RSI: 0000000001013531 RDI: ffff8800da4af950
 RBP: 0000000000000000 R08: ffff8800da1f9a00 R09: 0000000000000000
 R10: ffff8800db45b7e4 R11: 0000000000000246 R12: ffff8800da4af950
 R13: ffff8800d97a74c0 R14: 0000000000000000 R15: ffff8800d97a7480
 FS:  00007fd3970e0700(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: ffffc90001630024 CR3: 000000011a7e4000 CR4: 00000000000006e0
 Stack:
  ffffffff814d00d3 0000000000000000 ffff88011a05ff20 ffff8800da1f9a00
  ffffffff811dd8b9 0000000000000800 0000000000020000 00007fd396f35000
  ffffffff811f8714 0000000000003431 ffffffff8138dce0 0000000000000f80
 Call Trace:
  [<ffffffff814d00d3>] ? fib_route_seq_start+0x93/0xc0
  [<ffffffff811dd8b9>] ? seq_read+0x149/0x380
  [<ffffffff811f8714>] ? fsnotify+0x3b4/0x500
  [<ffffffff8138dce0>] ? process_echoes+0x70/0x70
  [<ffffffff8121cfa7>] ? proc_reg_read+0x47/0x70
  [<ffffffff811bb823>] ? __vfs_read+0x23/0xd0
  [<ffffffff811bbd42>] ? rw_verify_area+0x52/0xf0
  [<ffffffff811bbe61>] ? vfs_read+0x81/0x120
  [<ffffffff811bcbc2>] ? SyS_read+0x42/0xa0
  [<ffffffff81549ab2>] ? entry_SYSCALL_64_fastpath+0x16/0x75
 Code: 48 85 c0 75 d8 f3 c3 31 c0 c3 f3 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00
a 04 89 f0 33 02 44 89 c9 48 d3 e8 0f b6 4a 05 49 89
 RIP  [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
  RSP <ffff88011a05fda0>
 CR2: ffffc90001630024

Signed-off-by: Dave Forster <dforster@brocade.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d07fc07..febca0f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2452,9 +2452,7 @@ struct fib_route_iter {
 static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
 					    loff_t pos)
 {
-	struct fib_table *tb = iter->main_tb;
 	struct key_vector *l, **tp = &iter->tnode;
-	struct trie *t;
 	t_key key;
 
 	/* use cache location of next-to-find key */
@@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
 		pos -= iter->pos;
 		key = iter->key;
 	} else {
-		t = (struct trie *)tb->tb_data;
-		iter->tnode = t->kv;
 		iter->pos = 0;
 		key = 0;
 	}
@@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
 		return NULL;
 
 	iter->main_tb = tb;
+	t = (struct trie *)tb->tb_data;
+	iter->tnode = t->kv;
 
 	if (*pos != 0)
 		return fib_route_get_idx(iter, *pos);
 
-	t = (struct trie *)tb->tb_data;
-	iter->tnode = t->kv;
 	iter->pos = 0;
 	iter->key = 0;
 
-- 
cgit v0.10.2


From 5e3b724e2767fb6495df1dcccaf7c79585c78ae9 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 3 Aug 2016 21:42:18 +0200
Subject: net: dsa: b53: Add missing ULL suffix for 64-bit constant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1):

    drivers/net/dsa/b53/b53_common.c: In function ‘b53_arl_read’:
    drivers/net/dsa/b53/b53_common.c:1072: warning: integer constant is too large for ‘long’ type

Fixes: 1da6df85c6fbed8f ("net: dsa: b53: Implement ARL add/del/dump operations")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index 8f12bdd..a0b453e 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -258,7 +258,7 @@
  * BCM5325 and BCM5365 share most definitions below
  */
 #define B53_ARLTBL_MAC_VID_ENTRY(n)	(0x10 * (n))
-#define   ARLTBL_MAC_MASK		0xffffffffffff
+#define   ARLTBL_MAC_MASK		0xffffffffffffULL
 #define   ARLTBL_VID_S			48
 #define   ARLTBL_VID_MASK_25		0xff
 #define   ARLTBL_VID_MASK		0xfff
-- 
cgit v0.10.2


From a6ed3ea65d9868fdf9eff84e6fe4f666b8d14b02 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 5 Aug 2016 14:01:27 -0700
Subject: bpf: restore behavior of bpf_map_update_elem

The introduction of pre-allocated hash elements inadvertently broke
the behavior of bpf hash maps where users expected to call
bpf_map_update_elem() without considering that the map can be full.
Some programs do:
old_value = bpf_map_lookup_elem(map, key);
if (old_value) {
  ... prepare new_value on stack ...
  bpf_map_update_elem(map, key, new_value);
}
Before pre-alloc the update() for existing element would work even
in 'map full' condition. Restore this behavior.

The above program could have updated old_value in place instead of
update() which would be faster and most programs use that approach,
but sometimes the values are large and the programs use update()
helper to do atomic replacement of the element.
Note we cannot simply update element's value in-place like percpu
hash map does and have to allocate extra num_possible_cpu elements
and use this extra reserve when the map is full.

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fff3650..570eeca 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@ struct bpf_htab {
 	struct bucket *buckets;
 	void *elems;
 	struct pcpu_freelist freelist;
+	void __percpu *extra_elems;
 	atomic_t count;	/* number of elements in this hashtable */
 	u32 n_buckets;	/* number of hash buckets */
 	u32 elem_size;	/* size of each element in bytes */
 };
 
+enum extra_elem_state {
+	HTAB_NOT_AN_EXTRA_ELEM = 0,
+	HTAB_EXTRA_ELEM_FREE,
+	HTAB_EXTRA_ELEM_USED
+};
+
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
 	union {
@@ -38,7 +45,10 @@ struct htab_elem {
 		struct bpf_htab *htab;
 		struct pcpu_freelist_node fnode;
 	};
-	struct rcu_head rcu;
+	union {
+		struct rcu_head rcu;
+		enum extra_elem_state state;
+	};
 	u32 hash;
 	char key[0] __aligned(8);
 };
@@ -113,6 +123,23 @@ free_elems:
 	return err;
 }
 
+static int alloc_extra_elems(struct bpf_htab *htab)
+{
+	void __percpu *pptr;
+	int cpu;
+
+	pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+	if (!pptr)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
+			HTAB_EXTRA_ELEM_FREE;
+	}
+	htab->extra_elems = pptr;
+	return 0;
+}
+
 /* Called from syscall */
 static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 {
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	if (percpu)
 		cost += (u64) round_up(htab->map.value_size, 8) *
 			num_possible_cpus() * htab->map.max_entries;
+	else
+	       cost += (u64) htab->elem_size * num_possible_cpus();
 
 	if (cost >= U32_MAX - PAGE_SIZE)
 		/* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
+	if (!percpu) {
+		err = alloc_extra_elems(htab);
+		if (err)
+			goto free_buckets;
+	}
+
 	if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
 		err = prealloc_elems_and_freelist(htab);
 		if (err)
-			goto free_buckets;
+			goto free_extra_elems;
 	}
 
 	return &htab->map;
 
+free_extra_elems:
+	free_percpu(htab->extra_elems);
 free_buckets:
 	kvfree(htab->buckets);
 free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
 	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
 		free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
 	kfree(l);
-
 }
 
 static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
 
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
+	if (l->state == HTAB_EXTRA_ELEM_USED) {
+		l->state = HTAB_EXTRA_ELEM_FREE;
+		return;
+	}
+
 	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
 		pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
-					 bool percpu, bool onallcpus)
+					 bool percpu, bool onallcpus,
+					 bool old_elem_exists)
 {
 	u32 size = htab->map.value_size;
 	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
 	struct htab_elem *l_new;
 	void __percpu *pptr;
+	int err = 0;
 
 	if (prealloc) {
 		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
 		if (!l_new)
-			return ERR_PTR(-E2BIG);
+			err = -E2BIG;
 	} else {
 		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
 			atomic_dec(&htab->count);
-			return ERR_PTR(-E2BIG);
+			err = -E2BIG;
+		} else {
+			l_new = kmalloc(htab->elem_size,
+					GFP_ATOMIC | __GFP_NOWARN);
+			if (!l_new)
+				return ERR_PTR(-ENOMEM);
 		}
-		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
-		if (!l_new)
-			return ERR_PTR(-ENOMEM);
+	}
+
+	if (err) {
+		if (!old_elem_exists)
+			return ERR_PTR(err);
+
+		/* if we're updating the existing element and the hash table
+		 * is full, use per-cpu extra elems
+		 */
+		l_new = this_cpu_ptr(htab->extra_elems);
+		if (l_new->state != HTAB_EXTRA_ELEM_FREE)
+			return ERR_PTR(-E2BIG);
+		l_new->state = HTAB_EXTRA_ELEM_USED;
+	} else {
+		l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
 	}
 
 	memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (ret)
 		goto err;
 
-	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+				!!l_old);
 	if (IS_ERR(l_new)) {
 		/* all pre-allocated elements are in use or memory exhausted */
 		ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 		}
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
-					hash, true, onallcpus);
+					hash, true, onallcpus, false);
 		if (IS_ERR(l_new)) {
 			ret = PTR_ERR(l_new);
 			goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
 		htab_free_elems(htab);
 		pcpu_freelist_destroy(&htab->freelist);
 	}
+	free_percpu(htab->extra_elems);
 	kvfree(htab->buckets);
 	kfree(htab);
 }
-- 
cgit v0.10.2


From ba0cc3c153590e3d31985b8f8914d205a20b0d7a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 5 Aug 2016 14:01:28 -0700
Subject: samples/bpf: add bpf_map_update_elem() tests

increase test coverage to check previously missing 'update when full'

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
index 47bf085..cce2b59 100644
--- a/samples/bpf/test_maps.c
+++ b/samples/bpf/test_maps.c
@@ -68,7 +68,16 @@ static void test_hashmap_sanity(int i, void *data)
 	assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
 	       errno == E2BIG);
 
+	/* update existing element, thought the map is full */
+	key = 1;
+	assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+	key = 2;
+	assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+	key = 1;
+	assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
 	/* check that key = 0 doesn't exist */
+	key = 0;
 	assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
 
 	/* iterate over two elements */
@@ -413,10 +422,12 @@ static void do_work(int fn, void *data)
 
 	for (i = fn; i < MAP_SIZE; i += TASKS) {
 		key = value = i;
-		if (do_update)
+		if (do_update) {
 			assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
-		else
+			assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+		} else {
 			assert(bpf_delete_elem(map_fd, &key) == 0);
+		}
 	}
 }
 
-- 
cgit v0.10.2


From c518189567eaf42b2ec50a4d982484c8e38799f8 Mon Sep 17 00:00:00 2001
From: Harini Katakam <harini.katakam@xilinx.com>
Date: Fri, 5 Aug 2016 10:31:58 +0530
Subject: net: macb: Correct CAPS mask

USRIO and JUMBO CAPS have the same mask.
Fix the same.

Fixes: ce721a702197 ("net: ethernet: cadence-macb: Add disabled usrio caps")
Cc: stable@vger.kernel.org # v4.5+
Signed-off-by: Harini Katakam <harinik@xilinx.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 36893d8..b6fcf10 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -403,11 +403,11 @@
 #define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII	0x00000004
 #define MACB_CAPS_NO_GIGABIT_HALF		0x00000008
 #define MACB_CAPS_USRIO_DISABLED		0x00000010
+#define MACB_CAPS_JUMBO				0x00000020
 #define MACB_CAPS_FIFO_MODE			0x10000000
 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE	0x20000000
 #define MACB_CAPS_SG_DISABLED			0x40000000
 #define MACB_CAPS_MACB_IS_GEM			0x80000000
-#define MACB_CAPS_JUMBO				0x00000010
 
 /* Bit manipulation macros */
 #define MACB_BIT(name)					\
-- 
cgit v0.10.2


From 707e6835f89419ff1c05e828c2d9939fd95a7ad8 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Sat, 23 Jul 2016 22:16:56 +0800
Subject: netfilter: nf_ct_h323: do not re-activate already expired timer

Commit 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of
set_expect_timeout") just simplify the source codes
    if (!del_timer(&exp->timeout))
        return 0;
    add_timer(&exp->timeout);
to mod_timer(&exp->timeout, jiffies + info->timeout * HZ);

This is not correct, and introduce a race codition:
    CPU0                     CPU1
     -                     timer expire
  process_rcf              expectation_timed_out
  lock(exp_lock)              -
  find_exp                 waiting exp_lock...
  re-activate timer!!      waiting exp_lock...
  unlock(exp_lock)         lock(exp_lock)
     -                     unlink expect
     -                     free(expect)
     -                     unlock(exp_lock)
So when the timer expires again, we will access the memory that
was already freed.

Replace mod_timer with mod_timer_pending here to fix this problem.

Fixes: 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of set_expect_timeout")
Cc: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bb77a97..5c0db5c 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1473,7 +1473,8 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
 				 "timeout to %u seconds for",
 				 info->timeout);
 			nf_ct_dump_tuple(&exp->tuple);
-			mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
+			mod_timer_pending(&exp->timeout,
+					  jiffies + info->timeout * HZ);
 		}
 		spin_unlock_bh(&nf_conntrack_expect_lock);
 	}
-- 
cgit v0.10.2


From 2c86943c20e375b0fe562af0626f2e5461d8d203 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 24 Jul 2016 19:53:19 +0200
Subject: netfilter: nf_tables: s/MFT_REG32_01/NFT_REG32_01

MFT_REG32_01 is a typo, rename this to NFT_REG32_01.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 01751fa..c674ba2 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -24,7 +24,7 @@ enum nft_registers {
 	__NFT_REG_MAX,
 
 	NFT_REG32_00	= 8,
-	MFT_REG32_01,
+	NFT_REG32_01,
 	NFT_REG32_02,
 	NFT_REG32_03,
 	NFT_REG32_04,
-- 
cgit v0.10.2


From c1eda3c6394f805886b2afa8c7ea5e04305ec698 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 1 Aug 2016 13:13:08 +0200
Subject: netfilter: nft_rbtree: ignore inactive matching element with no
 descendants

If we find a matching element that is inactive with no descendants, we
jump to the found label, then crash because of nul-dereference on the
left branch.

Fix this by checking that the element is active and not an interval end
and skipping the logic that only applies to the tree iteration.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Tested-by: Anders K. Pedersen <akp@akp.dk>

diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 6473936..ffe9ae0 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -70,7 +70,6 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 		} else if (d > 0)
 			parent = parent->rb_right;
 		else {
-found:
 			if (!nft_set_elem_active(&rbe->ext, genmask)) {
 				parent = parent->rb_left;
 				continue;
@@ -84,9 +83,12 @@ found:
 		}
 	}
 
-	if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
-		rbe = interval;
-		goto found;
+	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+	    nft_set_elem_active(&interval->ext, genmask) &&
+	    !nft_rbtree_interval_end(interval)) {
+		spin_unlock_bh(&nft_rbtree_lock);
+		*ext = &interval->ext;
+		return true;
 	}
 out:
 	spin_unlock_bh(&nft_rbtree_lock);
-- 
cgit v0.10.2


From 0d35d0815b19216f852f257afe420f7c7d182780 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 3 Aug 2016 12:41:40 +0200
Subject: netfilter: nf_conntrack_sip: CSeq 0 is a valid CSeq

Do not drop packet when CSeq is 0 as 0 is also a valid value for CSeq.

simple_strtoul() will return 0 either when all digits are 0
or if there are no digits at all. Therefore when simple_strtoul()
returns 0 we check if first character is digit 0 or not.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 8d9db9d..7d77217 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1383,7 +1383,7 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff,
 		return NF_DROP;
 	}
 	cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-	if (!cseq) {
+	if (!cseq && *(*dptr + matchoff) != '0') {
 		nf_ct_helper_log(skb, ct, "cannot get cseq");
 		return NF_DROP;
 	}
@@ -1446,7 +1446,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
 			return NF_DROP;
 		}
 		cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-		if (!cseq) {
+		if (!cseq && *(*dptr + matchoff) != '0') {
 			nf_ct_helper_log(skb, ct, "cannot get cseq");
 			return NF_DROP;
 		}
-- 
cgit v0.10.2


From dca3f53c02e325bb19dfc05b1571b9a706226fea Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 4 Aug 2016 12:11:55 +0200
Subject: sctp: Export struct sctp_info to userspace

This is required to correctly interpret INET_DIAG_INFO messages exported
by sctp_diag module.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index de1f643..fcb4c36 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -705,70 +705,6 @@ typedef struct sctp_auth_chunk {
 	sctp_authhdr_t auth_hdr;
 } __packed sctp_auth_chunk_t;
 
-struct sctp_info {
-	__u32	sctpi_tag;
-	__u32	sctpi_state;
-	__u32	sctpi_rwnd;
-	__u16	sctpi_unackdata;
-	__u16	sctpi_penddata;
-	__u16	sctpi_instrms;
-	__u16	sctpi_outstrms;
-	__u32	sctpi_fragmentation_point;
-	__u32	sctpi_inqueue;
-	__u32	sctpi_outqueue;
-	__u32	sctpi_overall_error;
-	__u32	sctpi_max_burst;
-	__u32	sctpi_maxseg;
-	__u32	sctpi_peer_rwnd;
-	__u32	sctpi_peer_tag;
-	__u8	sctpi_peer_capable;
-	__u8	sctpi_peer_sack;
-	__u16	__reserved1;
-
-	/* assoc status info */
-	__u64	sctpi_isacks;
-	__u64	sctpi_osacks;
-	__u64	sctpi_opackets;
-	__u64	sctpi_ipackets;
-	__u64	sctpi_rtxchunks;
-	__u64	sctpi_outofseqtsns;
-	__u64	sctpi_idupchunks;
-	__u64	sctpi_gapcnt;
-	__u64	sctpi_ouodchunks;
-	__u64	sctpi_iuodchunks;
-	__u64	sctpi_oodchunks;
-	__u64	sctpi_iodchunks;
-	__u64	sctpi_octrlchunks;
-	__u64	sctpi_ictrlchunks;
-
-	/* primary transport info */
-	struct sockaddr_storage	sctpi_p_address;
-	__s32	sctpi_p_state;
-	__u32	sctpi_p_cwnd;
-	__u32	sctpi_p_srtt;
-	__u32	sctpi_p_rto;
-	__u32	sctpi_p_hbinterval;
-	__u32	sctpi_p_pathmaxrxt;
-	__u32	sctpi_p_sackdelay;
-	__u32	sctpi_p_sackfreq;
-	__u32	sctpi_p_ssthresh;
-	__u32	sctpi_p_partial_bytes_acked;
-	__u32	sctpi_p_flight_size;
-	__u16	sctpi_p_error;
-	__u16	__reserved2;
-
-	/* sctp sock info */
-	__u32	sctpi_s_autoclose;
-	__u32	sctpi_s_adaptation_ind;
-	__u32	sctpi_s_pd_point;
-	__u8	sctpi_s_nodelay;
-	__u8	sctpi_s_disable_fragments;
-	__u8	sctpi_s_v4mapped;
-	__u8	sctpi_s_frag_interleave;
-	__u32	sctpi_s_type;
-	__u32	__reserved3;
-};
-
 struct sctp_infox {
 	struct sctp_info *sctpinfo;
 	struct sctp_association *asoc;
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index d304f4c..a406adc 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -944,4 +944,68 @@ struct sctp_default_prinfo {
 	__u16 pr_policy;
 };
 
+struct sctp_info {
+	__u32	sctpi_tag;
+	__u32	sctpi_state;
+	__u32	sctpi_rwnd;
+	__u16	sctpi_unackdata;
+	__u16	sctpi_penddata;
+	__u16	sctpi_instrms;
+	__u16	sctpi_outstrms;
+	__u32	sctpi_fragmentation_point;
+	__u32	sctpi_inqueue;
+	__u32	sctpi_outqueue;
+	__u32	sctpi_overall_error;
+	__u32	sctpi_max_burst;
+	__u32	sctpi_maxseg;
+	__u32	sctpi_peer_rwnd;
+	__u32	sctpi_peer_tag;
+	__u8	sctpi_peer_capable;
+	__u8	sctpi_peer_sack;
+	__u16	__reserved1;
+
+	/* assoc status info */
+	__u64	sctpi_isacks;
+	__u64	sctpi_osacks;
+	__u64	sctpi_opackets;
+	__u64	sctpi_ipackets;
+	__u64	sctpi_rtxchunks;
+	__u64	sctpi_outofseqtsns;
+	__u64	sctpi_idupchunks;
+	__u64	sctpi_gapcnt;
+	__u64	sctpi_ouodchunks;
+	__u64	sctpi_iuodchunks;
+	__u64	sctpi_oodchunks;
+	__u64	sctpi_iodchunks;
+	__u64	sctpi_octrlchunks;
+	__u64	sctpi_ictrlchunks;
+
+	/* primary transport info */
+	struct sockaddr_storage	sctpi_p_address;
+	__s32	sctpi_p_state;
+	__u32	sctpi_p_cwnd;
+	__u32	sctpi_p_srtt;
+	__u32	sctpi_p_rto;
+	__u32	sctpi_p_hbinterval;
+	__u32	sctpi_p_pathmaxrxt;
+	__u32	sctpi_p_sackdelay;
+	__u32	sctpi_p_sackfreq;
+	__u32	sctpi_p_ssthresh;
+	__u32	sctpi_p_partial_bytes_acked;
+	__u32	sctpi_p_flight_size;
+	__u16	sctpi_p_error;
+	__u16	__reserved2;
+
+	/* sctp sock info */
+	__u32	sctpi_s_autoclose;
+	__u32	sctpi_s_adaptation_ind;
+	__u32	sctpi_s_pd_point;
+	__u8	sctpi_s_nodelay;
+	__u8	sctpi_s_disable_fragments;
+	__u8	sctpi_s_v4mapped;
+	__u8	sctpi_s_frag_interleave;
+	__u32	sctpi_s_type;
+	__u32	__reserved3;
+};
+
 #endif /* _UAPI_SCTP_H */
-- 
cgit v0.10.2


From 12474e8e58d82e2b815cd35956c0c06fab104ee7 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 4 Aug 2016 12:11:56 +0200
Subject: sctp_diag: Fix T3_rtx timer export

The asoc's timer value is not kept in asoc->timeouts array but in it's
primary transport instead.

Furthermore, we must export the timer only if it is pending, otherwise
the value will underrun when stored in an unsigned variable and
user space will only see a very large timeout value.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index f69edcf..f728ef0 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -13,6 +13,7 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
 {
 	union sctp_addr laddr, paddr;
 	struct dst_entry *dst;
+	struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
 
 	laddr = list_entry(asoc->base.bind_addr.address_list.next,
 			   struct sctp_sockaddr_entry, list)->a;
@@ -40,10 +41,15 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
 	}
 
 	r->idiag_state = asoc->state;
-	r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
-	r->idiag_retrans = asoc->rtx_data_chunks;
-	r->idiag_expires = jiffies_to_msecs(
-		asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies);
+	if (timer_pending(t3_rtx)) {
+		r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+		r->idiag_retrans = asoc->rtx_data_chunks;
+		r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+	} else {
+		r->idiag_timer = 0;
+		r->idiag_retrans = 0;
+		r->idiag_expires = 0;
+	}
 }
 
 static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
-- 
cgit v0.10.2


From 1ba8d77f410d3b7423df39e8a10a02af272d3c42 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 4 Aug 2016 12:11:57 +0200
Subject: sctp_diag: Respect ss adding TCPF_CLOSE to idiag_states

Since 'ss' always adds TCPF_CLOSE to idiag_states flags, sctp_diag can't
rely upon TCPF_LISTEN flag solely being present when listening sockets
are requested.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index f728ef0..bb69153 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -356,7 +356,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
 	if (cb->args[4] < cb->args[1])
 		goto next;
 
-	if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs))
+	if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
 		goto next;
 
 	if (r->sdiag_family != AF_UNSPEC &&
@@ -471,7 +471,7 @@ skip:
 	 * 3 : to mark if we have dumped the ep info of the current asoc
 	 * 4 : to work as a temporary variable to traversal list
 	 */
-	if (!(idiag_states & ~TCPF_LISTEN))
+	if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
 		goto done;
 	sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
 done:
-- 
cgit v0.10.2


From 3b3bf80b994f0c6c35a25ef8965ab956b4bcced5 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 4 Aug 2016 12:37:17 +0200
Subject: rhashtable-test: Fix max_size parameter description

Looks like a simple copy'n'paste error.

Fixes: 1aa661f5c3df1 ("rhashtable-test: Measure time to insert, remove & traverse entries")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 297fdb5..64e899b 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
 
 static int max_size = 0;
 module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
 
 static bool shrinking = false;
 module_param(shrinking, bool, 0);
-- 
cgit v0.10.2


From b489a2000f19e414710d2887fe3e24e903242766 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 4 Aug 2016 17:36:20 +0300
Subject: mlxsw: spectrum: Do not assume PAUSE frames are disabled

When ieee_setpfc() gets called, PAUSE frames are not necessarily
disabled on the port.

Check if PAUSE frames are disabled or enabled and configure the port's
headroom buffer accordingly.

Fixes: d81a6bdb87ce ("mlxsw: spectrum: Add IEEE 802.1Qbb PFC support")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 01cfb75..3c4a178 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -351,17 +351,17 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 				      struct ieee_pfc *pfc)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
 	int err;
 
-	if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
-	    pfc->pfc_en) {
+	if (pause_en && pfc->pfc_en) {
 		netdev_err(dev, "PAUSE frames already enabled on port\n");
 		return -EINVAL;
 	}
 
 	err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
 					   mlxsw_sp_port->dcb.ets->prio_tc,
-					   false, pfc);
+					   pause_en, pfc);
 	if (err) {
 		netdev_err(dev, "Failed to configure port's headroom for PFC\n");
 		return err;
@@ -380,7 +380,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 
 err_port_pfc_set:
 	__mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
-				     mlxsw_sp_port->dcb.ets->prio_tc, false,
+				     mlxsw_sp_port->dcb.ets->prio_tc, pause_en,
 				     mlxsw_sp_port->dcb.pfc);
 	return err;
 }
-- 
cgit v0.10.2


From 07d50cae0661e5479d54d6e3e21cad15b1198103 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 4 Aug 2016 17:36:21 +0300
Subject: mlxsw: spectrum: Do not override PAUSE settings

The PFCC register is used to configure both PAUSE and PFC frames.
Therefore, when PFC frames are disabled we must make sure we don't
mistakenly also disable PAUSE frames (which might be enabled).

Fix this by packing the PFCC register with the current PAUSE settings.

Note that this register is also accessed via ethtool ops, but there we
are guaranteed to have PFC disabled.

Fixes: d81a6bdb87ce ("mlxsw: spectrum: Add IEEE 802.1Qbb PFC support")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 3c4a178..b6ed7f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -341,6 +341,8 @@ static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	char pfcc_pl[MLXSW_REG_PFCC_LEN];
 
 	mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+	mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause);
+	mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause);
 	mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
 
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
-- 
cgit v0.10.2


From 4de34eb5743f720dc4798f0647f75c21d44aa1f8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 4 Aug 2016 17:36:22 +0300
Subject: mlxsw: spectrum: Add missing DCB rollback in error path

We correctly execute mlxsw_sp_port_dcb_fini() when port is removed, but
I missed its rollback in the error path of port creation, so add it.

Fixes: f00817df2b42 ("mlxsw: spectrum: Introduce support for Data Center Bridging (DCB)")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c3e6150..e1b8f62 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2220,6 +2220,7 @@ err_port_vlan_init:
 err_core_port_init:
 	unregister_netdev(dev);
 err_register_netdev:
+	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 err_port_dcb_init:
 err_port_ets_init:
 err_port_buffers_init:
-- 
cgit v0.10.2


From 66cf3504f4c528793b3d8986bab606f7cfb1c4bb Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 4 Aug 2016 16:07:58 -0400
Subject: net/ethernet: tundra: fix dump_eth_one warning in tsi108_eth

The call site for this function appears as:

  #ifdef DEBUG
        data->msg_enable = DEBUG;
        dump_eth_one(dev);
  #endif

...leading to the following warning for !DEBUG builds:

drivers/net/ethernet/tundra/tsi108_eth.c:169:13: warning: 'dump_eth_one' defined but not used [-Wunused-function]
 static void dump_eth_one(struct net_device *dev)
             ^

...when using the arch/powerpc/configs/mpc7448_hpc2_defconfig

Put the function definition under the same #ifdef as the call site
to avoid the warning.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index 01a7714..8fd1312 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -166,6 +166,7 @@ static struct platform_driver tsi_eth_driver = {
 
 static void tsi108_timed_checker(unsigned long dev_ptr);
 
+#ifdef DEBUG
 static void dump_eth_one(struct net_device *dev)
 {
 	struct tsi108_prv_data *data = netdev_priv(dev);
@@ -190,6 +191,7 @@ static void dump_eth_one(struct net_device *dev)
 	       TSI_READ(TSI108_EC_RXESTAT),
 	       TSI_READ(TSI108_EC_RXERR), data->rxpending);
 }
+#endif
 
 /* Synchronization is needed between the thread and up/down events.
  * Note that the PHY is accessed through the same registers for both
-- 
cgit v0.10.2


From a2bfe6bf09a5f38df2bd0b1734f5fdee76f9366f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 5 Aug 2016 00:11:11 +0200
Subject: bpf: also call skb_postpush_rcsum on xmit occasions

Follow-up to commit f8ffad69c9f8 ("bpf: add skb_postpush_rcsum and fix
dev_forward_skb occasions") to fix an issue for dev_queue_xmit() redirect
locations which need CHECKSUM_COMPLETE fixups on ingress.

For the same reasons as described in f8ffad69c9f8 already, we of course
also need this here, since dev_queue_xmit() on a veth device will let us
end up in the dev_forward_skb() helper again to cross namespaces.

Latter then calls into skb_postpull_rcsum() to pull out L2 header, so
that netif_rx_internal() sees CHECKSUM_COMPLETE as it is expected. That
is, CHECKSUM_COMPLETE on ingress covering L2 _payload_, not L2 headers.

Also here we have to address bpf_redirect() and bpf_clone_redirect().

Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper")
Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/filter.c b/net/core/filter.c
index 5708999..c46244f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1365,6 +1365,12 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
 	return err;
 }
 
+static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
+{
+	if (skb_at_tc_ingress(skb))
+		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
 	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
@@ -1607,9 +1613,6 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
 
 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
 {
-	if (skb_at_tc_ingress(skb))
-		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
-
 	return dev_forward_skb(dev, skb);
 }
 
@@ -1648,6 +1651,8 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
 	if (unlikely(!skb))
 		return -ENOMEM;
 
+	bpf_push_mac_rcsum(skb);
+
 	return flags & BPF_F_INGRESS ?
 	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
 }
@@ -1693,6 +1698,8 @@ int skb_do_redirect(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
+	bpf_push_mac_rcsum(skb);
+
 	return ri->flags & BPF_F_INGRESS ?
 	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
 }
-- 
cgit v0.10.2


From 479ffcccefd7b04442b0e949f8779b0612e8c75f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 5 Aug 2016 00:11:12 +0200
Subject: bpf: fix checksum fixups on bpf_skb_store_bytes

bpf_skb_store_bytes() invocations above L2 header need BPF_F_RECOMPUTE_CSUM
flag for updates, so that CHECKSUM_COMPLETE will be fixed up along the way.
Where we ran into an issue with bpf_skb_store_bytes() is when we did a
single-byte update on the IPv6 hoplimit despite using BPF_F_RECOMPUTE_CSUM
flag; simple ping via ICMPv6 triggered a hw csum failure as a result. The
underlying issue has been tracked down to a buffer alignment issue.

Meaning, that csum_partial() computations via skb_postpull_rcsum() and
skb_postpush_rcsum() pair invoked had a wrong result since they operated on
an odd address for the hoplimit, while other computations were done on an
even address. This mix doesn't work as-is with skb_postpull_rcsum(),
skb_postpush_rcsum() pair as it always expects at least half-word alignment
of input buffers, which is normally the case. Thus, instead of these helpers
using csum_sub() and (implicitly) csum_add(), we need to use csum_block_sub(),
csum_block_add(), respectively. For unaligned offsets, they rotate the sum
to align it to a half-word boundary again, otherwise they work the same as
csum_sub() and csum_add().

Adding __skb_postpull_rcsum(), __skb_postpush_rcsum() variants that take the
offset as an input and adapting bpf_skb_store_bytes() to them fixes the hw
csum failures again. The skb_postpull_rcsum(), skb_postpush_rcsum() helpers
use a 0 constant for offset so that the compiler optimizes the offset & 1
test away and generates the same code as with csum_sub()/_add().

Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f0b3e0..0f665cb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2847,6 +2847,18 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
 	       __skb_linearize(skb) : 0;
 }
 
+static __always_inline void
+__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+		     unsigned int off)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_block_sub(skb->csum,
+					   csum_partial(start, len, 0), off);
+	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		 skb_checksum_start_offset(skb) < 0)
+		skb->ip_summed = CHECKSUM_NONE;
+}
+
 /**
  *	skb_postpull_rcsum - update checksum for received skb after pull
  *	@skb: buffer to update
@@ -2857,36 +2869,38 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
  *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
  *	CHECKSUM_NONE so that it can be recomputed from scratch.
  */
-
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
-	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
-		 skb_checksum_start_offset(skb) < 0)
-		skb->ip_summed = CHECKSUM_NONE;
+	__skb_postpull_rcsum(skb, start, len, 0);
 }
 
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+static __always_inline void
+__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+		     unsigned int off)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_block_add(skb->csum,
+					   csum_partial(start, len, 0), off);
+}
 
+/**
+ *	skb_postpush_rcsum - update checksum for received skb after push
+ *	@skb: buffer to update
+ *	@start: start of data after push
+ *	@len: length of data pushed
+ *
+ *	After doing a push on a received packet, you need to call this to
+ *	update the CHECKSUM_COMPLETE checksum.
+ */
 static inline void skb_postpush_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	/* For performing the reverse operation to skb_postpull_rcsum(),
-	 * we can instead of ...
-	 *
-	 *   skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
-	 *
-	 * ... just use this equivalent version here to save a few
-	 * instructions. Feeding csum of 0 in csum_partial() and later
-	 * on adding skb->csum is equivalent to feed skb->csum in the
-	 * first place.
-	 */
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_partial(start, len, skb->csum);
+	__skb_postpush_rcsum(skb, start, len, 0);
 }
 
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
 /**
  *	skb_push_rcsum - push skb and update receive checksum
  *	@skb: buffer to update
diff --git a/net/core/filter.c b/net/core/filter.c
index c46244f..5ecd5c9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1401,7 +1401,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 		return -EFAULT;
 
 	if (flags & BPF_F_RECOMPUTE_CSUM)
-		skb_postpull_rcsum(skb, ptr, len);
+		__skb_postpull_rcsum(skb, ptr, len, offset);
 
 	memcpy(ptr, from, len);
 
@@ -1410,7 +1410,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 		skb_store_bits(skb, offset, ptr, len);
 
 	if (flags & BPF_F_RECOMPUTE_CSUM)
-		skb_postpush_rcsum(skb, ptr, len);
+		__skb_postpush_rcsum(skb, ptr, len, offset);
 	if (flags & BPF_F_INVALIDATE_HASH)
 		skb_clear_hash(skb);
 
-- 
cgit v0.10.2


From 8065694e6519d234ccee93d952127e3f05b81ba5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 5 Aug 2016 00:11:13 +0200
Subject: bpf: fix checksum for vlan push/pop helper

When having skbs on ingress with CHECKSUM_COMPLETE, tc BPF programs don't
push rcsum of mac header back in and after BPF run back pull out again as
opposed to some other subsystems (ovs, for example).

For cases like q-in-q, meaning when a vlan tag for offloading is already
present and we're about to push another one, then skb_vlan_push() pushes the
inner one into the skb, increasing mac header and skb_postpush_rcsum()'ing
the 4 bytes vlan header diff. Likewise, for the reverse operation in
skb_vlan_pop() for the case where vlan header needs to be pulled out of the
skb, we're decreasing the mac header and skb_postpull_rcsum()'ing the 4 bytes
rcsum of the vlan header that was removed.

However mangling the rcsum here will lead to hw csum failure for BPF case,
since we're pulling or pushing data that was not part of the current rcsum.
Changing tc BPF programs in general to push/pull rcsum around BPF_PROG_RUN()
is also not really an option since current behaviour is ABI by now, but apart
from that would also mean to do quite a bit of useless work in the sense that
usually 12 bytes need to be rcsum pushed/pulled also when we don't need to
touch this vlan related corner case. One way to fix it would be to push the
necessary rcsum fixup down into vlan helpers that are (mostly) slow-path
anyway.

Fixes: 4e10df9a60d9 ("bpf: introduce bpf_skb_vlan_push/pop() helpers")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/filter.c b/net/core/filter.c
index 5ecd5c9..b5add4e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1371,6 +1371,12 @@ static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
 		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
 }
 
+static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
+{
+	if (skb_at_tc_ingress(skb))
+		skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
 	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
@@ -1763,7 +1769,10 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
 		     vlan_proto != htons(ETH_P_8021AD)))
 		vlan_proto = htons(ETH_P_8021Q);
 
+	bpf_push_mac_rcsum(skb);
 	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+	bpf_pull_mac_rcsum(skb);
+
 	bpf_compute_data_end(skb);
 	return ret;
 }
@@ -1783,7 +1792,10 @@ static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	int ret;
 
+	bpf_push_mac_rcsum(skb);
 	ret = skb_vlan_pop(skb);
+	bpf_pull_mac_rcsum(skb);
+
 	bpf_compute_data_end(skb);
 	return ret;
 }
-- 
cgit v0.10.2


From 272d96a5ab10662691b4ec90c4a66fdbf30ea7ba Mon Sep 17 00:00:00 2001
From: pravin shelar <pshelar@ovn.org>
Date: Fri, 5 Aug 2016 17:45:36 -0700
Subject: net: vxlan: lwt: Use source ip address during route lookup.

LWT user can specify destination as well as source ip address
for given tunnel endpoint. But vxlan is ignoring given source
ip address. Following patch uses both ip address to route the
tunnel packet. This consistent with other LWT implementations,
like GENEVE and GRE.

Fixes: ee122c79d42 ("vxlan: Flow based tunneling").
Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index da4e3d6..b812234 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1811,7 +1811,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
 	fl4.flowi4_mark = skb->mark;
 	fl4.flowi4_proto = IPPROTO_UDP;
 	fl4.daddr = daddr;
-	fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
+	fl4.saddr = *saddr;
 
 	rt = ip_route_output_key(vxlan->net, &fl4);
 	if (!IS_ERR(rt)) {
@@ -1847,7 +1847,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
 	fl6.daddr = *daddr;
-	fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+	fl6.saddr = *saddr;
 	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
 	fl6.flowi6_mark = skb->mark;
 	fl6.flowi6_proto = IPPROTO_UDP;
@@ -1920,7 +1920,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	struct rtable *rt = NULL;
 	const struct iphdr *old_iph;
 	union vxlan_addr *dst;
-	union vxlan_addr remote_ip;
+	union vxlan_addr remote_ip, local_ip;
+	union vxlan_addr *src;
 	struct vxlan_metadata _md;
 	struct vxlan_metadata *md = &_md;
 	__be16 src_port = 0, dst_port;
@@ -1938,6 +1939,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
 		vni = rdst->remote_vni;
 		dst = &rdst->remote_ip;
+		src = &vxlan->cfg.saddr;
 		dst_cache = &rdst->dst_cache;
 	} else {
 		if (!info) {
@@ -1948,11 +1950,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
 		vni = vxlan_tun_id_to_vni(info->key.tun_id);
 		remote_ip.sa.sa_family = ip_tunnel_info_af(info);
-		if (remote_ip.sa.sa_family == AF_INET)
+		if (remote_ip.sa.sa_family == AF_INET) {
 			remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
-		else
+			local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
+		} else {
 			remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
+			local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
+		}
 		dst = &remote_ip;
+		src = &local_ip;
 		dst_cache = &info->dst_cache;
 	}
 
@@ -1992,15 +1998,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	if (dst->sa.sa_family == AF_INET) {
-		__be32 saddr;
-
 		if (!vxlan->vn4_sock)
 			goto drop;
 		sk = vxlan->vn4_sock->sock->sk;
 
 		rt = vxlan_get_route(vxlan, skb,
 				     rdst ? rdst->remote_ifindex : 0, tos,
-				     dst->sin.sin_addr.s_addr, &saddr,
+				     dst->sin.sin_addr.s_addr,
+				     &src->sin.sin_addr.s_addr,
 				     dst_cache, info);
 		if (IS_ERR(rt)) {
 			netdev_dbg(dev, "no route to %pI4\n",
@@ -2043,13 +2048,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		if (err < 0)
 			goto xmit_tx_error;
 
-		udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+		udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
 				    dst->sin.sin_addr.s_addr, tos, ttl, df,
 				    src_port, dst_port, xnet, !udp_sum);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
 		struct dst_entry *ndst;
-		struct in6_addr saddr;
 		u32 rt6i_flags;
 
 		if (!vxlan->vn6_sock)
@@ -2058,7 +2062,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 		ndst = vxlan6_get_route(vxlan, skb,
 					rdst ? rdst->remote_ifindex : 0, tos,
-					label, &dst->sin6.sin6_addr, &saddr,
+					label, &dst->sin6.sin6_addr,
+					&src->sin6.sin6_addr,
 					dst_cache, info);
 		if (IS_ERR(ndst)) {
 			netdev_dbg(dev, "no route to %pI6\n",
@@ -2104,7 +2109,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 			return;
 		}
 		udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
-				     &saddr, &dst->sin6.sin6_addr, tos, ttl,
+				     &src->sin6.sin6_addr,
+				     &dst->sin6.sin6_addr, tos, ttl,
 				     label, src_port, dst_port, !udp_sum);
 #endif
 	}
-- 
cgit v0.10.2


From bbec7802c6948c8626b71a4fe31283cb4691c358 Mon Sep 17 00:00:00 2001
From: pravin shelar <pshelar@ovn.org>
Date: Fri, 5 Aug 2016 17:45:37 -0700
Subject: net: vxlan: lwt: Fix vxlan local traffic.

vxlan driver has bypass for local vxlan traffic, but that
depends on information about all VNIs on local system in
vxlan driver. This is not available in case of LWT.
Therefore following patch disable encap bypass for LWT
vxlan traffic.

Fixes: ee122c79d42 ("vxlan: Flow based tunneling").
Reported-by: Jakub Libosvar <jlibosva@redhat.com>
Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index b812234..c0dda6f 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2022,7 +2022,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		/* Bypass encapsulation if the destination is local */
-		if (rt->rt_flags & RTCF_LOCAL &&
+		if (!info && rt->rt_flags & RTCF_LOCAL &&
 		    !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
 			struct vxlan_dev *dst_vxlan;
 
@@ -2082,7 +2082,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 		/* Bypass encapsulation if the destination is local */
 		rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
-		if (rt6i_flags & RTF_LOCAL &&
+		if (!info && rt6i_flags & RTF_LOCAL &&
 		    !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
 			struct vxlan_dev *dst_vxlan;
 
-- 
cgit v0.10.2


From 1fe323aa1b2390a0c57fb0b06a782f128d49094c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 7 Aug 2016 14:15:13 +0800
Subject: sctp: use event->chunk when it's valid

Commit 52253db924d1 ("sctp: also point GSO head_skb to the sk when
it's available") used event->chunk->head_skb to get the head_skb in
sctp_ulpevent_set_owner().

But at that moment, the event->chunk was NULL, as it cloned the skb
in sctp_ulpevent_make_rcvmsg(). Therefore, that patch didn't really
work.

This patch is to move the event->chunk initialization before calling
sctp_ulpevent_receive_data() so that it uses event->chunk when it's
valid.

Fixes: 52253db924d1 ("sctp: also point GSO head_skb to the sk when it's available")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 1bc4f71..d85b803 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -702,14 +702,14 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	 */
 	sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff));
 
-	sctp_ulpevent_receive_data(event, asoc);
-
 	/* And hold the chunk as we need it for getting the IP headers
 	 * later in recvmsg
 	 */
 	sctp_chunk_hold(chunk);
 	event->chunk = chunk;
 
+	sctp_ulpevent_receive_data(event, asoc);
+
 	event->stream = ntohs(chunk->subh.data_hdr->stream);
 	event->ssn = ntohs(chunk->subh.data_hdr->ssn);
 	event->ppid = chunk->subh.data_hdr->ppid;
-- 
cgit v0.10.2


From c0c45a6bd7d054efd80c1033bf4285830c72835b Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Mon, 8 Aug 2016 21:57:40 -0400
Subject: qed: Remove the endian-ness conversion for pri_to_tc value.

Endian-ness conversion is not needed for priority-to-TC field as the
field is already being read/written by the driver in big-endian way.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index d0dc28f..6869330 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -483,7 +483,7 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn,
 	bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]);
 	tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]);
 	tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]);
-	pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]);
+	pri_map = p_ets->pri_tc_tbl[0];
 	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
 		p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i];
 		p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i];
@@ -944,7 +944,6 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn,
 		val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4));
 		p_ets->pri_tc_tbl[0] |= val;
 	}
-	p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]);
 	for (i = 0; i < 2; i++) {
 		p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]);
 		p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]);
-- 
cgit v0.10.2


From fb9ea8a9b70c79f38d2758c25d3acff4a2cd5bfb Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Mon, 8 Aug 2016 21:57:41 -0400
Subject: qed: Use ieee mfw-mask to get ethtype in ieee-dcbx mode.

Ethtype value is being read incorrectly in ieee-dcbx mode. Use the
correct mfw mask value.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 6869330..f07f0ac 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -52,16 +52,33 @@ static bool qed_dcbx_app_ethtype(u32 app_info_bitmap)
 		  DCBX_APP_SF_ETHTYPE);
 }
 
+static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap)
+{
+	u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+	/* Old MFW */
+	if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+		return qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE);
+}
+
 static bool qed_dcbx_app_port(u32 app_info_bitmap)
 {
 	return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) ==
 		  DCBX_APP_SF_PORT);
 }
 
-static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-		  proto_id == QED_ETH_TYPE_DEFAULT);
+	bool ethtype;
+
+	if (ieee)
+		ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+	else
+		ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT));
 }
 
 static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id)
@@ -70,16 +87,28 @@ static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id)
 		  proto_id == QED_TCP_PORT_ISCSI);
 }
 
-static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-		  proto_id == QED_ETH_TYPE_FCOE);
+	bool ethtype;
+
+	if (ieee)
+		ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+	else
+		ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE));
 }
 
-static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-		  proto_id == QED_ETH_TYPE_ROCE);
+	bool ethtype;
+
+	if (ieee)
+		ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+	else
+		ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE));
 }
 
 static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id)
@@ -164,15 +193,15 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
 static bool
 qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
 			       u32 app_prio_bitmap,
-			       u16 id, enum dcbx_protocol_type *type)
+			       u16 id, enum dcbx_protocol_type *type, bool ieee)
 {
-	if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) {
+	if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_FCOE;
-	} else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ROCE;
 	} else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) {
 		*type = DCBX_PROTOCOL_ISCSI;
-	} else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ETH;
 	} else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) {
 		*type = DCBX_PROTOCOL_ROCE_V2;
@@ -194,17 +223,18 @@ static int
 qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 		     struct qed_dcbx_results *p_data,
 		     struct dcbx_app_priority_entry *p_tbl,
-		     u32 pri_tc_tbl, int count, bool dcbx_enabled)
+		     u32 pri_tc_tbl, int count, u8 dcbx_version)
 {
 	u8 tc, priority_map;
 	enum dcbx_protocol_type type;
+	bool enable, ieee;
 	u16 protocol_id;
 	int priority;
-	bool enable;
 	int i;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count);
 
+	ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE);
 	/* Parse APP TLV */
 	for (i = 0; i < count; i++) {
 		protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
@@ -219,7 +249,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 
 		tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority);
 		if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
-						   protocol_id, &type)) {
+						   protocol_id, &type, ieee)) {
 			/* ETH always have the enable bit reset, as it gets
 			 * vlan information per packet. For other protocols,
 			 * should be set according to the dcbx_enabled
@@ -275,15 +305,12 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
 	struct dcbx_ets_feature *p_ets;
 	struct qed_hw_info *p_info;
 	u32 pri_tc_tbl, flags;
-	bool dcbx_enabled;
+	u8 dcbx_version;
 	int num_entries;
 	int rc = 0;
 
-	/* If DCBx version is non zero, then negotiation was
-	 * successfuly performed
-	 */
 	flags = p_hwfn->p_dcbx_info->operational.flags;
-	dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
+	dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
 
 	p_app = &p_hwfn->p_dcbx_info->operational.features.app;
 	p_tbl = p_app->app_pri_tbl;
@@ -295,13 +322,13 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
 	num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
 
 	rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
-				  num_entries, dcbx_enabled);
+				  num_entries, dcbx_version);
 	if (rc)
 		return rc;
 
 	p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
 	data.pf_id = p_hwfn->rel_pf_id;
-	data.dcbx_enabled = dcbx_enabled;
+	data.dcbx_enabled = !!dcbx_version;
 
 	qed_dcbx_dp_protocol(p_hwfn, &data);
 
@@ -400,7 +427,7 @@ static void
 qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
 		      struct dcbx_app_priority_feature *p_app,
 		      struct dcbx_app_priority_entry *p_tbl,
-		      struct qed_dcbx_params *p_params)
+		      struct qed_dcbx_params *p_params, bool ieee)
 {
 	struct qed_app_entry *entry;
 	u8 pri_map;
@@ -422,7 +449,7 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
 						    DCBX_APP_PROTOCOL_ID);
 		qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
 					       entry->proto_id,
-					       &entry->proto_type);
+					       &entry->proto_type, ieee);
 	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_DCB,
@@ -500,9 +527,9 @@ qed_dcbx_get_common_params(struct qed_hwfn *p_hwfn,
 			   struct dcbx_app_priority_feature *p_app,
 			   struct dcbx_app_priority_entry *p_tbl,
 			   struct dcbx_ets_feature *p_ets,
-			   u32 pfc, struct qed_dcbx_params *p_params)
+			   u32 pfc, struct qed_dcbx_params *p_params, bool ieee)
 {
-	qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params);
+	qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee);
 	qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params);
 	qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params);
 }
@@ -516,7 +543,7 @@ qed_dcbx_get_local_params(struct qed_hwfn *p_hwfn,
 	p_feat = &p_hwfn->p_dcbx_info->local_admin.features;
 	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
 				   p_feat->app.app_pri_tbl, &p_feat->ets,
-				   p_feat->pfc, &params->local.params);
+				   p_feat->pfc, &params->local.params, false);
 	params->local.valid = true;
 }
 
@@ -529,7 +556,7 @@ qed_dcbx_get_remote_params(struct qed_hwfn *p_hwfn,
 	p_feat = &p_hwfn->p_dcbx_info->remote.features;
 	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
 				   p_feat->app.app_pri_tbl, &p_feat->ets,
-				   p_feat->pfc, &params->remote.params);
+				   p_feat->pfc, &params->remote.params, false);
 	params->remote.valid = true;
 }
 
@@ -574,7 +601,8 @@ qed_dcbx_get_operational_params(struct qed_hwfn *p_hwfn,
 
 	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
 				   p_feat->app.app_pri_tbl, &p_feat->ets,
-				   p_feat->pfc, &params->operational.params);
+				   p_feat->pfc, &params->operational.params,
+				   p_operational->ieee);
 	qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results);
 	err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
 	p_operational->err = err;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 5927840..6f9d3b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -6850,6 +6850,14 @@ struct dcbx_app_priority_entry {
 #define DCBX_APP_SF_SHIFT		8
 #define DCBX_APP_SF_ETHTYPE		0
 #define DCBX_APP_SF_PORT		1
+#define DCBX_APP_SF_IEEE_MASK		0x0000f000
+#define DCBX_APP_SF_IEEE_SHIFT		12
+#define DCBX_APP_SF_IEEE_RESERVED	0
+#define DCBX_APP_SF_IEEE_ETHTYPE	1
+#define DCBX_APP_SF_IEEE_TCP_PORT	2
+#define DCBX_APP_SF_IEEE_UDP_PORT	3
+#define DCBX_APP_SF_IEEE_TCP_UDP_PORT	4
+
 #define DCBX_APP_PROTOCOL_ID_MASK	0xffff0000
 #define DCBX_APP_PROTOCOL_ID_SHIFT	16
 };
-- 
cgit v0.10.2


From 59bcb7972fc5d53a621ee6b2c3cf1654cebb3dc5 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Mon, 8 Aug 2016 21:57:42 -0400
Subject: qed: Add dcbx app support for IEEE Selection Field.

MFW now supports the Selection field for IEEE mode. Add driver changes to
use the newer MFW masks to read/write the port-id value.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index f07f0ac..b157a6a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -69,6 +69,17 @@ static bool qed_dcbx_app_port(u32 app_info_bitmap)
 		  DCBX_APP_SF_PORT);
 }
 
+static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type)
+{
+	u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+	/* Old MFW */
+	if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+		return qed_dcbx_app_port(app_info_bitmap);
+
+	return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT);
+}
+
 static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
 	bool ethtype;
@@ -81,10 +92,17 @@ static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 	return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT));
 }
 
-static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_port(app_info_bitmap) &&
-		  proto_id == QED_TCP_PORT_ISCSI);
+	bool port;
+
+	if (ieee)
+		port = qed_dcbx_ieee_app_port(app_info_bitmap,
+					      DCBX_APP_SF_IEEE_TCP_PORT);
+	else
+		port = qed_dcbx_app_port(app_info_bitmap);
+
+	return !!(port && (proto_id == QED_TCP_PORT_ISCSI));
 }
 
 static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
@@ -111,10 +129,17 @@ static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 	return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE));
 }
 
-static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_port(app_info_bitmap) &&
-		  proto_id == QED_UDP_PORT_TYPE_ROCE_V2);
+	bool port;
+
+	if (ieee)
+		port = qed_dcbx_ieee_app_port(app_info_bitmap,
+					      DCBX_APP_SF_IEEE_UDP_PORT);
+	else
+		port = qed_dcbx_app_port(app_info_bitmap);
+
+	return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2));
 }
 
 static void
@@ -199,11 +224,11 @@ qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
 		*type = DCBX_PROTOCOL_FCOE;
 	} else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ROCE;
-	} else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ISCSI;
 	} else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ETH;
-	} else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ROCE_V2;
 	} else {
 		*type = DCBX_MAX_PROTOCOL_TYPE;
@@ -441,8 +466,39 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
 						      DCBX_APP_NUM_ENTRIES);
 	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
 		entry = &p_params->app_entry[i];
-		entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
-						     DCBX_APP_SF));
+		if (ieee) {
+			u8 sf_ieee;
+			u32 val;
+
+			sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry,
+						    DCBX_APP_SF_IEEE);
+			switch (sf_ieee) {
+			case DCBX_APP_SF_IEEE_RESERVED:
+				/* Old MFW */
+				val = QED_MFW_GET_FIELD(p_tbl[i].entry,
+							DCBX_APP_SF);
+				entry->sf_ieee = val ?
+				    QED_DCBX_SF_IEEE_TCP_UDP_PORT :
+				    QED_DCBX_SF_IEEE_ETHTYPE;
+				break;
+			case DCBX_APP_SF_IEEE_ETHTYPE:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE;
+				break;
+			case DCBX_APP_SF_IEEE_TCP_PORT:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT;
+				break;
+			case DCBX_APP_SF_IEEE_UDP_PORT:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT;
+				break;
+			case DCBX_APP_SF_IEEE_TCP_UDP_PORT:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT;
+				break;
+			}
+		} else {
+			entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
+							     DCBX_APP_SF));
+		}
+
 		pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
 		entry->prio = ffs(pri_map) - 1;
 		entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
@@ -981,7 +1037,7 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn,
 static void
 qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
 		      struct dcbx_app_priority_feature *p_app,
-		      struct qed_dcbx_params *p_params)
+		      struct qed_dcbx_params *p_params, bool ieee)
 {
 	u32 *entry;
 	int i;
@@ -1002,12 +1058,36 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
 
 	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
 		entry = &p_app->app_pri_tbl[i].entry;
-		*entry &= ~DCBX_APP_SF_MASK;
-		if (p_params->app_entry[i].ethtype)
-			*entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
-				   DCBX_APP_SF_SHIFT);
-		else
-			*entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT);
+		if (ieee) {
+			*entry &= ~DCBX_APP_SF_IEEE_MASK;
+			switch (p_params->app_entry[i].sf_ieee) {
+			case QED_DCBX_SF_IEEE_ETHTYPE:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				break;
+			case QED_DCBX_SF_IEEE_TCP_PORT:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				break;
+			case QED_DCBX_SF_IEEE_UDP_PORT:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				break;
+			case QED_DCBX_SF_IEEE_TCP_UDP_PORT:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				break;
+			}
+		} else {
+			*entry &= ~DCBX_APP_SF_MASK;
+			if (p_params->app_entry[i].ethtype)
+				*entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+					   DCBX_APP_SF_SHIFT);
+			else
+				*entry |= ((u32)DCBX_APP_SF_PORT <<
+					   DCBX_APP_SF_SHIFT);
+		}
+
 		*entry &= ~DCBX_APP_PROTOCOL_ID_MASK;
 		*entry |= ((u32)p_params->app_entry[i].proto_id <<
 			   DCBX_APP_PROTOCOL_ID_SHIFT);
@@ -1022,15 +1102,19 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn,
 			  struct dcbx_local_params *local_admin,
 			  struct qed_dcbx_set *params)
 {
+	bool ieee = false;
+
 	local_admin->flags = 0;
 	memcpy(&local_admin->features,
 	       &p_hwfn->p_dcbx_info->operational.features,
 	       sizeof(local_admin->features));
 
-	if (params->enabled)
+	if (params->enabled) {
 		local_admin->config = params->ver_num;
-	else
+		ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE);
+	} else {
 		local_admin->config = DCBX_CONFIG_VERSION_DISABLED;
+	}
 
 	if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG)
 		qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc,
@@ -1042,7 +1126,7 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn,
 
 	if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG)
 		qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app,
-				      &params->config.params);
+				      &params->config.params, ieee);
 }
 
 int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b1e3c57..d6c4177 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -70,8 +70,16 @@ struct qed_dbcx_pfc_params {
 	u8 max_tc;
 };
 
+enum qed_dcbx_sf_ieee_type {
+	QED_DCBX_SF_IEEE_ETHTYPE,
+	QED_DCBX_SF_IEEE_TCP_PORT,
+	QED_DCBX_SF_IEEE_UDP_PORT,
+	QED_DCBX_SF_IEEE_TCP_UDP_PORT
+};
+
 struct qed_app_entry {
 	bool ethtype;
+	enum qed_dcbx_sf_ieee_type sf_ieee;
 	bool enabled;
 	u8 prio;
 	u16 proto_id;
-- 
cgit v0.10.2


From 1d7406ce7bdfc48cd7390f793d23ef81fff75880 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Mon, 8 Aug 2016 21:57:43 -0400
Subject: qed: Update app count when adding a new dcbx app entry to the table.

App count is not updated while adding new app entry to the dcbx app table.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index b157a6a..226cb08 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -1707,8 +1707,10 @@ static int qed_dcbnl_setapp(struct qed_dev *cdev,
 		if ((entry->ethtype == ethtype) && (entry->proto_id == idval))
 			break;
 		/* First empty slot */
-		if (!entry->proto_id)
+		if (!entry->proto_id) {
+			dcbx_set.config.params.num_app_entries++;
 			break;
+		}
 	}
 
 	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
@@ -2228,8 +2230,10 @@ int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
 		    (entry->proto_id == app->protocol))
 			break;
 		/* First empty slot */
-		if (!entry->proto_id)
+		if (!entry->proto_id) {
+			dcbx_set.config.params.num_app_entries++;
 			break;
+		}
 	}
 
 	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
-- 
cgit v0.10.2


From b173a28f62cf929324a8a6adcc45adadce311d16 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Mon, 8 Aug 2016 21:57:58 +0800
Subject: netfilter: nf_ct_expect: remove the redundant slash when policy name
 is empty

The 'name' filed in struct nf_conntrack_expect_policy{} is not a
pointer, so check it is NULL or not will always return true. Even if the
name is empty, slash will always be displayed like follows:
  # cat /proc/net/nf_conntrack_expect
  297 l3proto = 2 proto=6 src=1.1.1.1 dst=2.2.2.2 sport=1 dport=1025 ftp/
                                                                        ^

Fixes: 3a8fc53a45c4 ("netfilter: nf_ct_helper: allocate 16 bytes for the helper and policy names")
Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 9e36931..f8dbacf 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -574,7 +574,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
 	helper = rcu_dereference(nfct_help(expect->master)->helper);
 	if (helper) {
 		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
-		if (helper->expect_policy[expect->class].name)
+		if (helper->expect_policy[expect->class].name[0])
 			seq_printf(s, "/%s",
 				   helper->expect_policy[expect->class].name);
 	}
-- 
cgit v0.10.2


From b18bcb0019cf57a3db0917b77e65c29bd9b00f54 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Mon, 8 Aug 2016 22:03:40 +0800
Subject: netfilter: nfnetlink_queue: fix memory leak when attach expectation
 successfully

User can use NFQA_EXP to attach expectations to conntracks, but we
forget to put back nf_conntrack_expect when it is inserted successfully,
i.e. in this normal case, expect's use refcnt will be 3. So even we
unlink it and put it back later, the use refcnt is still 1, then the
memory will be leaked forever.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 050bb34..b9bfe64 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2362,12 +2362,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 		return PTR_ERR(exp);
 
 	err = nf_ct_expect_related_report(exp, portid, report);
-	if (err < 0) {
-		nf_ct_expect_put(exp);
-		return err;
-	}
-
-	return 0;
+	nf_ct_expect_put(exp);
+	return err;
 }
 
 static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
-- 
cgit v0.10.2


From 00a3101f561816e58de054a470484996f78eb5eb Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Mon, 8 Aug 2016 22:07:27 +0800
Subject: netfilter: nfnetlink_queue: reject verdict request from different
 portid

Like NFQNL_MSG_VERDICT_BATCH do, we should also reject the verdict
request when the portid is not same with the initial portid(maybe
from another process).

Fixes: 97d32cf9440d ("netfilter: nfnetlink_queue: batch verdict support")
Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5d36a09..f49f450 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1145,10 +1145,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 	int err;
 
-	queue = instance_lookup(q, queue_num);
-	if (!queue)
-		queue = verdict_instance_lookup(q, queue_num,
-						NETLINK_CB(skb).portid);
+	queue = verdict_instance_lookup(q, queue_num,
+					NETLINK_CB(skb).portid);
 	if (IS_ERR(queue))
 		return PTR_ERR(queue);
 
-- 
cgit v0.10.2


From aa0c2c68abd1e2915656cc81afdb195bc8595dec Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Mon, 8 Aug 2016 22:10:26 +0800
Subject: netfilter: ctnetlink: reject new conntrack request with different
 l4proto

Currently, user can add a conntrack with different l4proto via nfnetlink.
For example, original tuple is TCP while reply tuple is SCTP. This is
invalid combination, we should report EINVAL to userspace.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b9bfe64..fdfc71f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1894,6 +1894,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 
 			if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
 				return -EINVAL;
+			if (otuple.dst.protonum != rtuple.dst.protonum)
+				return -EINVAL;
 
 			ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
 							&rtuple, u3);
-- 
cgit v0.10.2


From 55cae7a403f3b52de3dd6e4a614582541c9631af Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Aug 2016 12:13:45 +0200
Subject: rxrpc: fix uninitialized pointer dereference in debug code

A newly added bugfix caused an uninitialized variable to be
used for printing debug output. This is harmless as long
as the debug setting is disabled, but otherwise leads to an
immediate crash.

gcc warns about this when -Wmaybe-uninitialized is enabled:

net/rxrpc/call_object.c: In function 'rxrpc_release_call':
net/rxrpc/call_object.c:496:163: error: 'sp' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The initialization was removed but one of the users remains.
This adds back the initialization.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 372ee16386bb ("rxrpc: Fix races between skb free, ACK generation and replying")
Signed-off-by: David Howells <dhowells@redhat.com>

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index c47f14f..e8c953c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -493,6 +493,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
 		       (skb = skb_dequeue(&call->rx_oos_queue))) {
 			spin_unlock_bh(&call->lock);
 
+			sp = rxrpc_skb(skb);
 			_debug("- zap %s %%%u #%u",
 			       rxrpc_pkts[sp->hdr.type],
 			       sp->hdr.serial, sp->hdr.seq);
-- 
cgit v0.10.2


From 17b963e319449f709e78dc1ef445d797a13eecbc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 8 Aug 2016 13:06:41 +0100
Subject: rxrpc: Need to flag call as being released on connect failure

If rxrpc_new_client_call() fails to make a connection, the call record that
it allocated needs to be marked as RXRPC_CALL_RELEASED before it is passed
to rxrpc_put_call() to indicate that it no longer has any attachment to the
AF_RXRPC socket.

Without this, an assertion failure may occur at:

	net/rxrpc/call_object:635

Signed-off-by: David Howells <dhowells@redhat.com>

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index e8c953c..ae057e0 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -275,6 +275,7 @@ error:
 	list_del_init(&call->link);
 	write_unlock_bh(&rxrpc_call_lock);
 
+	set_bit(RXRPC_CALL_RELEASED, &call->flags);
 	call->state = RXRPC_CALL_DEAD;
 	rxrpc_put_call(call);
 	_leave(" = %d", ret);
@@ -287,6 +288,7 @@ error:
 	 */
 found_user_ID_now_present:
 	write_unlock(&rx->call_lock);
+	set_bit(RXRPC_CALL_RELEASED, &call->flags);
 	call->state = RXRPC_CALL_DEAD;
 	rxrpc_put_call(call);
 	_leave(" = -EEXIST [%p]", call);
-- 
cgit v0.10.2


From f9dc575725baeaad8eb5262589f9aebeeaf13433 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 8 Aug 2016 10:27:26 +0100
Subject: rxrpc: Don't access connection from call if pointer is NULL

The call state machine processor sets up the message parameters for a UDP
message that it might need to transmit in advance on the basis that there's
a very good chance it's going to have to transmit either an ACK or an
ABORT.  This requires it to look in the connection struct to retrieve some
of the parameters.

However, if the call is complete, the call connection pointer may be NULL
to dissuade the processor from transmitting a message.  However, there are
some situations where the processor is still going to be called - and it's
still going to set up message parameters whether it needs them or not.

This results in a NULL pointer dereference at:

	net/rxrpc/call_event.c:837

To fix this, skip the message pre-initialisation if there's no connection
attached.

Signed-off-by: David Howells <dhowells@redhat.com>

diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index f5e9916..e60cf65 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -837,6 +837,9 @@ void rxrpc_process_call(struct work_struct *work)
 		return;
 	}
 
+	if (!call->conn)
+		goto skip_msg_init;
+
 	/* there's a good chance we're going to have to send a message, so set
 	 * one up in advance */
 	msg.msg_name	= &call->conn->params.peer->srx.transport;
@@ -859,6 +862,7 @@ void rxrpc_process_call(struct work_struct *work)
 	memset(iov, 0, sizeof(iov));
 	iov[0].iov_base	= &whdr;
 	iov[0].iov_len	= sizeof(whdr);
+skip_msg_init:
 
 	/* deal with events of a final nature */
 	if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
-- 
cgit v0.10.2


From 2e7e9758b2b680fa615d5cf70b7af416e96162d2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Aug 2016 10:11:48 +0100
Subject: rxrpc: Once packet posted in data_ready, don't retry posting

Once a packet has been posted to a connection in the data_ready handler, we
mustn't try reposting if we then find that the connection is dying as the
refcount has been given over to the dying connection and the packet might
no longer exist.

Losing the packet isn't a problem as the peer will retransmit.

Signed-off-by: David Howells <dhowells@redhat.com>

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9e0f58e..04afdc0 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -567,13 +567,13 @@ done:
  * post connection-level events to the connection
  * - this includes challenges, responses and some aborts
  */
-static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 				      struct sk_buff *skb)
 {
 	_enter("%p,%p", conn, skb);
 
 	skb_queue_tail(&conn->rx_queue, skb);
-	return rxrpc_queue_conn(conn);
+	rxrpc_queue_conn(conn);
 }
 
 /*
@@ -694,7 +694,6 @@ void rxrpc_data_ready(struct sock *sk)
 
 	rcu_read_lock();
 
-retry_find_conn:
 	conn = rxrpc_find_connection_rcu(local, skb);
 	if (!conn)
 		goto cant_route_call;
@@ -702,8 +701,7 @@ retry_find_conn:
 	if (sp->hdr.callNumber == 0) {
 		/* Connection-level packet */
 		_debug("CONN %p {%d}", conn, conn->debug_id);
-		if (!rxrpc_post_packet_to_conn(conn, skb))
-			goto retry_find_conn;
+		rxrpc_post_packet_to_conn(conn, skb);
 	} else {
 		/* Call-bound packets are routed by connection channel. */
 		unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
-- 
cgit v0.10.2


From 50fd85a1f94753b86a7f540794dfd4f799e81ac2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Aug 2016 11:30:43 +0100
Subject: rxrpc: Fix a use-after-push in data_ready handler

Fix a use of a packet after it has been enqueued onto the packet processing
queue in the data_ready handler.  Once on a call's Rx queue, we mustn't
touch it any more as it may be dequeued and freed by the call processor
running on a work queue.

Save the values we need before enqueuing.

Without this, we can get an oops like the following:

BUG: unable to handle kernel NULL pointer dereference at 000000000000009c
IP: [<ffffffffa01854e8>] rxrpc_fast_process_packet+0x724/0xa11 [af_rxrpc]
PGD 0
Oops: 0000 [#1] SMP
Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc]
CPU: 2 PID: 0 Comm: swapper/2 Tainted: G            E   4.7.0-fsdevel+ #1336
Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
task: ffff88040d6863c0 task.stack: ffff88040d68c000
RIP: 0010:[<ffffffffa01854e8>]  [<ffffffffa01854e8>] rxrpc_fast_process_packet+0x724/0xa11 [af_rxrpc]
RSP: 0018:ffff88041fb03a78  EFLAGS: 00010246
RAX: ffffffffffffffff RBX: ffff8803ff195b00 RCX: 0000000000000001
RDX: ffffffffa01854d1 RSI: 0000000000000008 RDI: ffff8803ff195b00
RBP: ffff88041fb03ab0 R08: 0000000000000000 R09: 0000000000000001
R10: ffff88041fb038c8 R11: 0000000000000000 R12: ffff880406874800
R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000000009c CR3: 0000000001c14000 CR4: 00000000001406e0
Stack:
 ffff8803ff195ea0 ffff880408348800 ffff880406874800 ffff8803ff195b00
 ffff880408348800 ffff8803ff195ed8 0000000000000000 ffff88041fb03af0
 ffffffffa0186072 0000000000000000 ffff8804054da000 0000000000000000
Call Trace:
 <IRQ>
 [<ffffffffa0186072>] rxrpc_data_ready+0x89d/0xbae [af_rxrpc]
 [<ffffffff814c94d7>] __sock_queue_rcv_skb+0x24c/0x2b2
 [<ffffffff8155c59a>] __udp_queue_rcv_skb+0x4b/0x1bd
 [<ffffffff8155e048>] udp_queue_rcv_skb+0x281/0x4db
 [<ffffffff8155ea8f>] __udp4_lib_rcv+0x7ed/0x963
 [<ffffffff8155ef9a>] udp_rcv+0x15/0x17
 [<ffffffff81531d86>] ip_local_deliver_finish+0x1c3/0x318
 [<ffffffff81532544>] ip_local_deliver+0xbb/0xc4
 [<ffffffff81531bc3>] ? inet_del_offload+0x40/0x40
 [<ffffffff815322a9>] ip_rcv_finish+0x3ce/0x42c
 [<ffffffff81532851>] ip_rcv+0x304/0x33d
 [<ffffffff81531edb>] ? ip_local_deliver_finish+0x318/0x318
 [<ffffffff814dff9d>] __netif_receive_skb_core+0x601/0x6e8
 [<ffffffff814e072e>] __netif_receive_skb+0x13/0x54
 [<ffffffff814e082a>] netif_receive_skb_internal+0xbb/0x17c
 [<ffffffff814e1838>] napi_gro_receive+0xf9/0x1bd
 [<ffffffff8144eb9f>] rtl8169_poll+0x32b/0x4a8
 [<ffffffff814e1c7b>] net_rx_action+0xe8/0x357
 [<ffffffff81051074>] __do_softirq+0x1aa/0x414
 [<ffffffff810514ab>] irq_exit+0x3d/0xb0
 [<ffffffff810184a2>] do_IRQ+0xe4/0xfc
 [<ffffffff81612053>] common_interrupt+0x93/0x93
 <EOI>
 [<ffffffff814af837>] ? cpuidle_enter_state+0x1ad/0x2be
 [<ffffffff814af832>] ? cpuidle_enter_state+0x1a8/0x2be
 [<ffffffff814af96a>] cpuidle_enter+0x12/0x14
 [<ffffffff8108956f>] call_cpuidle+0x39/0x3b
 [<ffffffff81089855>] cpu_startup_entry+0x230/0x35d
 [<ffffffff810312ea>] start_secondary+0xf4/0xf7

Signed-off-by: David Howells <dhowells@redhat.com>

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 04afdc0..7897190 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -124,11 +124,15 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 	struct rxrpc_skb_priv *sp;
 	bool terminal;
 	int ret, ackbit, ack;
+	u32 serial;
+	u8 flags;
 
 	_enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
 
 	sp = rxrpc_skb(skb);
 	ASSERTCMP(sp->call, ==, NULL);
+	flags = sp->hdr.flags;
+	serial = sp->hdr.serial;
 
 	spin_lock(&call->lock);
 
@@ -192,8 +196,8 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 	sp->call = call;
 	rxrpc_get_call(call);
 	atomic_inc(&call->skb_count);
-	terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
-		    !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+	terminal = ((flags & RXRPC_LAST_PACKET) &&
+		    !(flags & RXRPC_CLIENT_INITIATED));
 	ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
 	if (ret < 0) {
 		if (ret == -ENOMEM || ret == -ENOBUFS) {
@@ -205,12 +209,13 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 	}
 
 	skb = NULL;
+	sp = NULL;
 
 	_debug("post #%u", seq);
 	ASSERTCMP(call->rx_data_post, ==, seq);
 	call->rx_data_post++;
 
-	if (sp->hdr.flags & RXRPC_LAST_PACKET)
+	if (flags & RXRPC_LAST_PACKET)
 		set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
 
 	/* if we've reached an out of sequence packet then we need to drain
@@ -226,7 +231,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 
 	spin_unlock(&call->lock);
 	atomic_inc(&call->ackr_not_idle);
-	rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+	rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
 	_leave(" = 0 [posted]");
 	return 0;
 
@@ -239,7 +244,7 @@ out:
 
 discard_and_ack:
 	_debug("discard and ACK packet %p", skb);
-	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+	__rxrpc_propose_ACK(call, ack, serial, true);
 discard:
 	spin_unlock(&call->lock);
 	rxrpc_free_skb(skb);
@@ -247,7 +252,7 @@ discard:
 	return 0;
 
 enqueue_and_ack:
-	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+	__rxrpc_propose_ACK(call, ack, serial, true);
 enqueue_packet:
 	_net("defer skb %p", skb);
 	spin_unlock(&call->lock);
-- 
cgit v0.10.2


From 992c273af9d9f12a2e470731f69bb3baa694562b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Aug 2016 16:58:42 +0100
Subject: rxrpc: Free packets discarded in data_ready

Under certain conditions, the data_ready handler will discard a packet.
These need to be freed.

Signed-off-by: David Howells <dhowells@redhat.com>

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 7897190..70bb778 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -744,6 +744,8 @@ cant_route_call:
 	if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
 		_debug("reject type %d",sp->hdr.type);
 		rxrpc_reject_packet(local, skb);
+	} else {
+		rxrpc_free_skb(skb);
 	}
 	_leave(" [no call]");
 	return;
-- 
cgit v0.10.2


From a5d0dc810abf3d6b241777467ee1d6efb02575fc Mon Sep 17 00:00:00 2001
From: Lance Richardson <lrichard@redhat.com>
Date: Tue, 9 Aug 2016 15:29:42 -0400
Subject: vti: flush x-netns xfrm cache when vti interface is removed

When executing the script included below, the netns delete operation
hangs with the following message (repeated at 10 second intervals):

  kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1

This occurs because a reference to the lo interface in the "secure" netns
is still held by a dst entry in the xfrm bundle cache in the init netns.

Address this problem by garbage collecting the tunnel netns flow cache
when a cross-namespace vti interface receives a NETDEV_DOWN notification.

A more detailed description of the problem scenario (referencing commands
in the script below):

(1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1

  The vti_test interface is created in the init namespace. vti_tunnel_init()
  attaches a struct ip_tunnel to the vti interface's netdev_priv(dev),
  setting the tunnel net to &init_net.

(2) ip link set vti_test netns secure

  The vti_test interface is moved to the "secure" netns. Note that
  the associated struct ip_tunnel still has tunnel->net set to &init_net.

(3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1

  The first packet sent using the vti device causes xfrm_lookup() to be
  called as follows:

      dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0);

  Note that tunnel->net is the init namespace, while skb_dst(skb) references
  the vti_test interface in the "secure" namespace. The returned dst
  references an interface in the init namespace.

  Also note that the first parameter to xfrm_lookup() determines which flow
  cache is used to store the computed xfrm bundle, so after xfrm_lookup()
  returns there will be a cached bundle in the init namespace flow cache
  with a dst referencing a device in the "secure" namespace.

(4) ip netns del secure

  Kernel begins to delete the "secure" namespace.  At some point the
  vti_test interface is deleted, at which point dst_ifdown() changes
  the dst->dev in the cached xfrm bundle flow from vti_test to lo (still
  in the "secure" namespace however).
  Since nothing has happened to cause the init namespace's flow cache
  to be garbage collected, this dst remains attached to the flow cache,
  so the kernel loops waiting for the last reference to lo to go away.

<Begin script>
ip link add br1 type bridge
ip link set dev br1 up
ip addr add dev br1 1.1.1.1/8

ip netns add secure
ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1
ip link set vti_test netns secure
ip netns exec secure ip link set vti_test up
ip netns exec secure ip link s lo up
ip netns exec secure ip addr add dev lo 192.168.100.1/24
ip netns exec secure ip route add 192.168.200.0/24 dev vti_test
ip xfrm policy flush
ip xfrm state flush
ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \
   proto esp mode tunnel mark 1
ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \
   proto esp mode tunnel mark 1
ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \
   mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788
ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \
   mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788

ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1

ip netns del secure
<End script>

Reported-by: Hangbin Liu <haliu@redhat.com>
Reported-by: Jan Tluka <jtluka@redhat.com>
Signed-off-by: Lance Richardson <lrichard@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index a917903..cc701fa 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
+static bool is_vti_tunnel(const struct net_device *dev)
+{
+	return dev->netdev_ops == &vti_netdev_ops;
+}
+
+static int vti_device_event(struct notifier_block *unused,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	if (!is_vti_tunnel(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_DOWN:
+		if (!net_eq(tunnel->net, dev_net(dev)))
+			xfrm_garbage_collect(tunnel->net);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vti_notifier_block __read_mostly = {
+	.notifier_call = vti_device_event,
+};
+
 static int __init vti_init(void)
 {
 	const char *msg;
@@ -564,6 +591,8 @@ static int __init vti_init(void)
 
 	pr_info("IPv4 over IPsec tunneling driver\n");
 
+	register_netdevice_notifier(&vti_notifier_block);
+
 	msg = "tunnel device";
 	err = register_pernet_device(&vti_net_ops);
 	if (err < 0)
@@ -596,6 +625,7 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
+	unregister_netdevice_notifier(&vti_notifier_block);
 	pr_err("vti init: failed to register %s\n", msg);
 	return err;
 }
@@ -607,6 +637,7 @@ static void __exit vti_fini(void)
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti_net_ops);
+	unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
-- 
cgit v0.10.2


From 254a49d5139a70828d652ef4faec40763993e403 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Tue, 9 Aug 2016 15:09:44 +0300
Subject: drivers: net: cpsw: fix kmemleak false-positive reports for sk
 buffers

Kmemleak reports following false positive memory leaks for each sk
buffers allocated by CPSW (__netdev_alloc_skb_ip_align()) in
cpsw_ndo_open() and cpsw_rx_handler():

unreferenced object 0xea915000 (size 2048):
  comm "systemd-network", pid 713, jiffies 4294938323 (age 102.180s)
  hex dump (first 32 bytes):
    00 58 91 ea ff ff ff ff ff ff ff ff ff ff ff ff  .X..............
    ff ff ff ff ff ff fd 0f 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<c0108680>] __kmalloc_track_caller+0x1a4/0x230
    [<c0529eb4>] __alloc_skb+0x68/0x16c
    [<c052c884>] __netdev_alloc_skb+0x40/0x104
    [<bf1ad29c>] cpsw_ndo_open+0x374/0x670 [ti_cpsw]
    [<c053c3d4>] __dev_open+0xb0/0x114
    [<c053c690>] __dev_change_flags+0x9c/0x14c
    [<c053c760>] dev_change_flags+0x20/0x50
    [<c054bdcc>] do_setlink+0x2cc/0x78c
    [<c054c358>] rtnl_setlink+0xcc/0x100
    [<c054b34c>] rtnetlink_rcv_msg+0x184/0x224
    [<c056467c>] netlink_rcv_skb+0xa8/0xc4
    [<c054b1c0>] rtnetlink_rcv+0x2c/0x34
    [<c0564018>] netlink_unicast+0x16c/0x1f8
    [<c0564498>] netlink_sendmsg+0x334/0x348
    [<c052015c>] sock_sendmsg+0x1c/0x2c
    [<c05213e0>] SyS_sendto+0xc0/0xe8

unreferenced object 0xec861780 (size 192):
  comm "softirq", pid 0, jiffies 4294938759 (age 109.540s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 b0 5a ed 00 00 00 00 00 00 00 00  ......Z.........
  backtrace:
    [<c0107830>] kmem_cache_alloc+0x190/0x208
    [<c052c768>] __build_skb+0x30/0x98
    [<c052c8fc>] __netdev_alloc_skb+0xb8/0x104
    [<bf1abc54>] cpsw_rx_handler+0x68/0x1e4 [ti_cpsw]
    [<bf11aa30>] __cpdma_chan_free+0xa8/0xc4 [davinci_cpdma]
    [<bf11ab98>] __cpdma_chan_process+0x14c/0x16c [davinci_cpdma]
    [<bf11abfc>] cpdma_chan_process+0x44/0x5c [davinci_cpdma]
    [<bf1adc78>] cpsw_rx_poll+0x1c/0x9c [ti_cpsw]
    [<c0539180>] net_rx_action+0x1f0/0x2ec
    [<c003881c>] __do_softirq+0x134/0x258
    [<c0038a00>] do_softirq+0x68/0x70
    [<c0038adc>] __local_bh_enable_ip+0xd4/0xe8
    [<c0640994>] _raw_spin_unlock_bh+0x30/0x34
    [<c05f4e9c>] igmp6_group_added+0x4c/0x1bc
    [<c05f6600>] ipv6_dev_mc_inc+0x398/0x434
    [<c05dba74>] addrconf_dad_work+0x224/0x39c

This happens because CPSW allocates SK buffers and then passes
pointers on them in CPDMA where they stored in internal CPPI RAM
(SRAM) which belongs to DEV MMIO space. Kmemleak does not scan IO
memory and so reports memory leaks.

Hence, mark allocated sk buffers as false positive explicitly.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c51f346..f85d605 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -734,6 +734,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
 		netif_receive_skb(skb);
 		ndev->stats.rx_bytes += len;
 		ndev->stats.rx_packets++;
+		kmemleak_not_leak(new_skb);
 	} else {
 		ndev->stats.rx_dropped++;
 		new_skb = skb;
@@ -1325,6 +1326,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 				kfree_skb(skb);
 				goto err_cleanup;
 			}
+			kmemleak_not_leak(skb);
 		}
 		/* continue even if we didn't manage to submit all
 		 * receive descs
-- 
cgit v0.10.2


From 0d039f337f45c48fb78b80cbf7b706b4de7f07ea Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <zyjzyj2000@gmail.com>
Date: Tue, 9 Aug 2016 21:36:04 +0800
Subject: bonding: fix the typo

The message "803.ad" should be "802.3ad".

Signed-off-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1f276fa..217e8da 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -152,7 +152,7 @@ module_param(lacp_rate, charp, 0);
 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; "
 			    "0 for slow, 1 for fast");
 module_param(ad_select, charp, 0);
-MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; "
+MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; "
 			    "0 for stable (default), 1 for bandwidth, "
 			    "2 for count");
 module_param(min_links, int, 0);
-- 
cgit v0.10.2


From a96d3b7593a3eefab62dd930e5c99201c3678ee4 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Tue, 9 Aug 2016 18:00:08 +0200
Subject: dm9000: Fix irq trigger type setup on non-dt platforms

Commit b5a099c67a1c36b "net: ethernet: davicom: fix devicetree irq
resource" causes an interrupt storm after the ethernet interface
is activated on S3C24XX platform (ARM non-dt), due to the interrupt
trigger type not being set properly.

It seems, after adding parsing of IRQ flags in commit 7085a7401ba54e92b
"drivers: platform: parse IRQ flags from resources", there is no path
for non-dt platforms where irq_set_type callback could be invoked when
we don't pass the trigger type flags to the request_irq() call.

In case of a board where the regression is seen the interrupt trigger
type flags are passed through a platform device's resource and it is
not currently handled properly without passing the irq trigger type
flags to the request_irq() call.  In case of OF an of_irq_get() call
within platform_get_irq() function seems to be ensuring required irq_chip
setup, but there is no equivalent code for non OF/ACPI platforms.

This patch mostly restores irq trigger type setting code which has been
removed in commit ("net: ethernet: davicom: fix devicetree irq resource").

Fixes: b5a099c67a1c36b913 ("net: ethernet: davicom: fix devicetree irq resource")

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 1471e16..f45385f 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1299,6 +1299,7 @@ static int
 dm9000_open(struct net_device *dev)
 {
 	struct board_info *db = netdev_priv(dev);
+	unsigned int irq_flags = irq_get_trigger_type(dev->irq);
 
 	if (netif_msg_ifup(db))
 		dev_dbg(db->dev, "enabling %s\n", dev->name);
@@ -1306,9 +1307,11 @@ dm9000_open(struct net_device *dev)
 	/* If there is no IRQ type specified, tell the user that this is a
 	 * problem
 	 */
-	if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
+	if (irq_flags == IRQF_TRIGGER_NONE)
 		dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
 
+	irq_flags |= IRQF_SHARED;
+
 	/* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
 	iow(db, DM9000_GPR, 0);	/* REG_1F bit0 activate phyxcer */
 	mdelay(1); /* delay needs by DM9000B */
@@ -1316,8 +1319,7 @@ dm9000_open(struct net_device *dev)
 	/* Initialize DM9000 board */
 	dm9000_init_dm9000(dev);
 
-	if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
-			dev->name, dev))
+	if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev))
 		return -EAGAIN;
 	/* Now that we have an interrupt handler hooked up we can unmask
 	 * our interrupts
-- 
cgit v0.10.2


From 836384d2501dee87b1c437f3e268871980c857bf Mon Sep 17 00:00:00 2001
From: Wenyou Yang <wenyou.yang@atmel.com>
Date: Fri, 5 Aug 2016 14:35:41 +0800
Subject: net: phy: micrel: Add specific suspend

Disable all interrupts when suspend, they will be enabled
when resume. Otherwise, the suspend/resume process will be
blocked occasionally.

Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 1882d98..053e879 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -677,17 +677,28 @@ static void kszphy_get_stats(struct phy_device *phydev,
 		data[i] = kszphy_get_stat(phydev, i);
 }
 
-static int kszphy_resume(struct phy_device *phydev)
+static int kszphy_suspend(struct phy_device *phydev)
 {
-	int value;
+	/* Disable PHY Interrupts */
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->interrupts = PHY_INTERRUPT_DISABLED;
+		if (phydev->drv->config_intr)
+			phydev->drv->config_intr(phydev);
+	}
 
-	mutex_lock(&phydev->lock);
+	return genphy_suspend(phydev);
+}
 
-	value = phy_read(phydev, MII_BMCR);
-	phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
+static int kszphy_resume(struct phy_device *phydev)
+{
+	genphy_resume(phydev);
 
-	kszphy_config_intr(phydev);
-	mutex_unlock(&phydev->lock);
+	/* Enable PHY Interrupts */
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->interrupts = PHY_INTERRUPT_ENABLED;
+		if (phydev->drv->config_intr)
+			phydev->drv->config_intr(phydev);
+	}
 
 	return 0;
 }
@@ -900,7 +911,7 @@ static struct phy_driver ksphy_driver[] = {
 	.get_sset_count = kszphy_get_sset_count,
 	.get_strings	= kszphy_get_strings,
 	.get_stats	= kszphy_get_stats,
-	.suspend	= genphy_suspend,
+	.suspend	= kszphy_suspend,
 	.resume		= kszphy_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ8061,
-- 
cgit v0.10.2


From 7bb90c3715a496c650b2e879225030f9dd9cfafb Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Thu, 4 Aug 2016 11:11:19 +0900
Subject: bridge: Fix problems around fdb entries pointing to the bridge device

Adding fdb entries pointing to the bridge device uses fdb_insert(),
which lacks various checks and does not respect added_by_user flag.

As a result, some inconsistent behavior can happen:
* Adding temporary entries succeeds but results in permanent entries.
* Same goes for "dynamic" and "use".
* Changing mac address of the bridge device causes deletion of
  user-added entries.
* Replacing existing entries looks successful from userspace but actually
  not, regardless of NLM_F_EXCL flag.

Use the same logic as other entries and fix them.

Fixes: 3741873b4f73 ("bridge: allow adding of fdb entries pointing to the bridge device")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c18080a..cd620fa 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -267,7 +267,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 
 	/* If old entry was unassociated with any port, then delete it. */
 	f = __br_fdb_get(br, br->dev->dev_addr, 0);
-	if (f && f->is_local && !f->dst)
+	if (f && f->is_local && !f->dst && !f->added_by_user)
 		fdb_delete_local(br, NULL, f);
 
 	fdb_insert(br, NULL, newaddr, 0);
@@ -282,7 +282,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 		if (!br_vlan_should_use(v))
 			continue;
 		f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
-		if (f && f->is_local && !f->dst)
+		if (f && f->is_local && !f->dst && !f->added_by_user)
 			fdb_delete_local(br, NULL, f);
 		fdb_insert(br, NULL, newaddr, v->vid);
 	}
@@ -764,20 +764,25 @@ out:
 }
 
 /* Update (create or replace) forwarding database entry */
-static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
-			 __u16 state, __u16 flags, __u16 vid)
+static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
+			 const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
 {
-	struct net_bridge *br = source->br;
 	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 	bool modified = false;
 
 	/* If the port cannot learn allow only local and static entries */
-	if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
+	if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
 	    !(source->state == BR_STATE_LEARNING ||
 	      source->state == BR_STATE_FORWARDING))
 		return -EPERM;
 
+	if (!source && !(state & NUD_PERMANENT)) {
+		pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n",
+			br->dev->name);
+		return -EINVAL;
+	}
+
 	fdb = fdb_find(head, addr, vid);
 	if (fdb == NULL) {
 		if (!(flags & NLM_F_CREATE))
@@ -832,22 +837,28 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 	return 0;
 }
 
-static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
-	       const unsigned char *addr, u16 nlh_flags, u16 vid)
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
+			struct net_bridge_port *p, const unsigned char *addr,
+			u16 nlh_flags, u16 vid)
 {
 	int err = 0;
 
 	if (ndm->ndm_flags & NTF_USE) {
+		if (!p) {
+			pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n",
+				br->dev->name);
+			return -EINVAL;
+		}
 		local_bh_disable();
 		rcu_read_lock();
-		br_fdb_update(p->br, p, addr, vid, true);
+		br_fdb_update(br, p, addr, vid, true);
 		rcu_read_unlock();
 		local_bh_enable();
 	} else {
-		spin_lock_bh(&p->br->hash_lock);
-		err = fdb_add_entry(p, addr, ndm->ndm_state,
+		spin_lock_bh(&br->hash_lock);
+		err = fdb_add_entry(br, p, addr, ndm->ndm_state,
 				    nlh_flags, vid);
-		spin_unlock_bh(&p->br->hash_lock);
+		spin_unlock_bh(&br->hash_lock);
 	}
 
 	return err;
@@ -884,6 +895,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 				dev->name);
 			return -EINVAL;
 		}
+		br = p->br;
 		vg = nbp_vlan_group(p);
 	}
 
@@ -895,15 +907,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		}
 
 		/* VID was specified, so use it. */
-		if (dev->priv_flags & IFF_EBRIDGE)
-			err = br_fdb_insert(br, NULL, addr, vid);
-		else
-			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
 	} else {
-		if (dev->priv_flags & IFF_EBRIDGE)
-			err = br_fdb_insert(br, NULL, addr, 0);
-		else
-			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
 		if (err || !vg || !vg->num_vlans)
 			goto out;
 
@@ -914,11 +920,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			if (!br_vlan_should_use(v))
 				continue;
-			if (dev->priv_flags & IFF_EBRIDGE)
-				err = br_fdb_insert(br, NULL, addr, v->vid);
-			else
-				err = __br_fdb_add(ndm, p, addr, nlh_flags,
-						   v->vid);
+			err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
 			if (err)
 				goto out;
 		}
-- 
cgit v0.10.2


From 4da449ae1df9cfeb167e78f250b250eff64bc65e Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Tue, 9 Aug 2016 20:46:16 +0200
Subject: netfilter: nft_exthdr: Add size check on u8 nft_exthdr attributes

Fix the direct assignment of offset and length attributes included in
nft_exthdr structure from u32 data to u8.

Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index ba7aed1..82c264e 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -59,6 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 offset, len;
 
 	if (tb[NFTA_EXTHDR_DREG] == NULL ||
 	    tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -66,9 +67,15 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
 	    tb[NFTA_EXTHDR_LEN] == NULL)
 		return -EINVAL;
 
+	offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+	len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+
+	if (offset > U8_MAX || len > U8_MAX)
+		return -ERANGE;
+
 	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
-	priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
-	priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+	priv->offset = offset;
+	priv->len    = len;
 	priv->dreg   = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
 
 	return nft_validate_register_store(ctx, priv->dreg, NULL,
-- 
cgit v0.10.2


From 672ca65d9aa8578f382784fe73578cd499664828 Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Wed, 10 Aug 2016 14:07:34 +0200
Subject: tipc: fix variable dereference before NULL check

In commit cf6f7e1d5109 ("tipc: dump monitor attributes"),
I dereferenced a pointer before checking if its valid.
This is reported by static check Smatch as:
net/tipc/monitor.c:733 tipc_nl_add_monitor_peer()
     warn: variable dereferenced before check 'mon' (see line 731)

In this commit, we check for a valid monitor before proceeding
with any other operation.

Fixes: cf6f7e1d5109 ("tipc: dump monitor attributes")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index b62caa1..ed97a58 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -728,12 +728,13 @@ int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
 			     u32 bearer_id, u32 *prev_node)
 {
 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
-	struct tipc_peer *peer = mon->self;
+	struct tipc_peer *peer;
 
 	if (!mon)
 		return -EINVAL;
 
 	read_lock_bh(&mon->lock);
+	peer = mon->self;
 	do {
 		if (*prev_node) {
 			if (peer->addr == *prev_node)
-- 
cgit v0.10.2


From dafa6b0db2d62164c5ef81a40312d5ba514126b9 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 10 Aug 2016 17:48:36 +0200
Subject: net: hns: fix typo in g_gmac_stats_string[]

s/gamc/gmac/

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 1235c7f..1e1eb92 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -17,7 +17,7 @@ static const struct mac_stats_string g_gmac_stats_string[] = {
 	{"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)},
 	{"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)},
 	{"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)},
-	{"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
+	{"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
 	{"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)},
 	{"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)},
 	{"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)},
-- 
cgit v0.10.2


From 4b5b9ba553f9aa5f484ab972fc9b58061885ceca Mon Sep 17 00:00:00 2001
From: Martynas Pumputis <martynas@weave.works>
Date: Tue, 9 Aug 2016 16:24:50 +0100
Subject: openvswitch: do not ignore netdev errors when creating tunnel vports

The creation of a tunnel vport (geneve, gre, vxlan) brings up a
corresponding netdev, a multi-step operation which can fail.

For example, changing a vxlan vport's netdev state to 'up' binds the
vport's socket to a UDP port - if the binding fails (e.g. due to the
port being in use), the error is currently ignored giving the
appearance that the tunnel vport creation completed successfully.

Signed-off-by: Martynas Pumputis <martynas@weave.works>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 1a1fcec..5aaf3ba 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -93,7 +93,14 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
 		return ERR_CAST(dev);
 	}
 
-	dev_change_flags(dev, dev->flags | IFF_UP);
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		goto error;
+	}
+
 	rtnl_unlock();
 	return vport;
 error:
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 7f8897f..0e72d95 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -54,6 +54,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
 	struct net *net = ovs_dp_get_net(parms->dp);
 	struct net_device *dev;
 	struct vport *vport;
+	int err;
 
 	vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
 	if (IS_ERR(vport))
@@ -67,9 +68,15 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
 		return ERR_CAST(dev);
 	}
 
-	dev_change_flags(dev, dev->flags | IFF_UP);
-	rtnl_unlock();
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_PTR(err);
+	}
 
+	rtnl_unlock();
 	return vport;
 }
 
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 5eb7694..7eb955e 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -130,7 +130,14 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 		return ERR_CAST(dev);
 	}
 
-	dev_change_flags(dev, dev->flags | IFF_UP);
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		goto error;
+	}
+
 	rtnl_unlock();
 	return vport;
 error:
-- 
cgit v0.10.2


From 104a493390940e85fb7c840a9fd5214aba5cb3bd Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 11 Aug 2016 18:15:56 +0800
Subject: macvtap: fix use after free for skb_array during release

We've clean skb_array in macvtap_put_queue() but still try to pop from
it during macvtap_sock_destruct(). Fix this use after free by moving
the skb array cleanup to macvtap_sock_destruct() instead.

Fixes: 362899b8725b ("macvtap: switch to use skb array")
Reported-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a38c0da..070e329 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -275,7 +275,6 @@ static void macvtap_put_queue(struct macvtap_queue *q)
 	rtnl_unlock();
 
 	synchronize_rcu();
-	skb_array_cleanup(&q->skb_array);
 	sock_put(&q->sk);
 }
 
@@ -533,10 +532,8 @@ static void macvtap_sock_write_space(struct sock *sk)
 static void macvtap_sock_destruct(struct sock *sk)
 {
 	struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
-	struct sk_buff *skb;
 
-	while ((skb = skb_array_consume(&q->skb_array)) != NULL)
-		kfree_skb(skb);
+	skb_array_cleanup(&q->skb_array);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
-- 
cgit v0.10.2


From bbe11fab0b6c1d113776b2898e085bf4d1fdc607 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 11 Aug 2016 15:24:27 +0200
Subject: macsec: use after free when deleting the underlying device

macsec_notify() loops over the list of macsec devices configured on the
underlying device when this device is being removed.  This list is part
of the rx_handler data.

However, macsec_dellink unregisters the rx_handler and frees the
rx_handler data when the last macsec device is removed from the
underlying device.

Add macsec_common_dellink() to delete macsec devices without
unregistering the rx_handler and freeing the associated data.

Fixes: 960d5848dbf1 ("macsec: fix memory leaks around rx_handler (un)registration")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index d13e6e1..dbd590a 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3047,22 +3047,29 @@ static void macsec_del_dev(struct macsec_dev *macsec)
 	}
 }
 
+static void macsec_common_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct macsec_dev *macsec = macsec_priv(dev);
+
+	unregister_netdevice_queue(dev, head);
+	list_del_rcu(&macsec->secys);
+	macsec_del_dev(macsec);
+
+	macsec_generation++;
+}
+
 static void macsec_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macsec_dev *macsec = macsec_priv(dev);
 	struct net_device *real_dev = macsec->real_dev;
 	struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
 
-	macsec_generation++;
+	macsec_common_dellink(dev, head);
 
-	unregister_netdevice_queue(dev, head);
-	list_del_rcu(&macsec->secys);
 	if (list_empty(&rxd->secys)) {
 		netdev_rx_handler_unregister(real_dev);
 		kfree(rxd);
 	}
-
-	macsec_del_dev(macsec);
 }
 
 static int register_macsec_dev(struct net_device *real_dev,
@@ -3382,8 +3389,12 @@ static int macsec_notify(struct notifier_block *this, unsigned long event,
 
 		rxd = macsec_data_rtnl(real_dev);
 		list_for_each_entry_safe(m, n, &rxd->secys, secys) {
-			macsec_dellink(m->secy.netdev, &head);
+			macsec_common_dellink(m->secy.netdev, &head);
 		}
+
+		netdev_rx_handler_unregister(real_dev);
+		kfree(rxd);
+
 		unregister_netdevice_many(&head);
 		break;
 	}
-- 
cgit v0.10.2


From 601bbae0bc10d4306857b93d84240b039b3d9a6c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 10 Aug 2016 23:54:08 +0200
Subject: dsa: mv88e6xxx: hide unused functions

When CONFIG_NET_DSA_HWMON is disabled, we get warnings about two unused
functions whose only callers are all inside of an #ifdef:

drivers/net/dsa/mv88e6xxx.c:3257:12: 'mv88e6xxx_mdio_page_write' defined but not used [-Werror=unused-function]
drivers/net/dsa/mv88e6xxx.c:3244:12: 'mv88e6xxx_mdio_page_read' defined but not used [-Werror=unused-function]

This adds another ifdef around the function definitions. The warnings
appeared after the functions were marked 'static', but the problem
was already there before that.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 57d3231057e9 ("net: dsa: mv88e6xxx: fix style issues")
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d36aedd..d1d9d3c 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3187,6 +3187,7 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
 	return err;
 }
 
+#ifdef CONFIG_NET_DSA_HWMON
 static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
 				    int reg)
 {
@@ -3212,6 +3213,7 @@ static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page,
 
 	return ret;
 }
+#endif
 
 static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
 {
-- 
cgit v0.10.2


From 747ea55e4f78fd980350c39570a986b8c1c3e4aa Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 12 Aug 2016 22:17:17 +0200
Subject: bpf: fix bpf_skb_in_cgroup helper naming

While hashing out BPF's current_task_under_cgroup helper bits, it came
to discussion that the skb_in_cgroup helper name was suboptimally chosen.

Tejun says:

  So, I think in_cgroup should mean that the object is in that
  particular cgroup while under_cgroup in the subhierarchy of that
  cgroup. Let's rename the other subhierarchy test to under too. I
  think that'd be a lot less confusing going forward.

  [...]

  It's more intuitive and gives us the room to implement the real
  "in" test if ever necessary in the future.

Since this touches uapi bits, we need to change this as long as v4.8
is not yet officially released. Thus, change the helper enum and rename
related bits.

Fixes: 4a482f34afcc ("cgroup: bpf: Add bpf_skb_in_cgroup_proto")
Reference: http://patchwork.ozlabs.org/patch/658500/
Suggested-by: Sargun Dhillon <sargun@sargun.me>
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fe..9e5fc16 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -339,7 +339,7 @@ enum bpf_func_id {
 	BPF_FUNC_skb_change_type,
 
 	/**
-	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
 	 * @skb: pointer to skb
 	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
 	 * @index: index of the cgroup in the bpf_map
@@ -348,7 +348,7 @@ enum bpf_func_id {
 	 *   == 1 skb succeeded the cgroup2 descendant test
 	 *    < 0 error
 	 */
-	BPF_FUNC_skb_in_cgroup,
+	BPF_FUNC_skb_under_cgroup,
 
 	/**
 	 * bpf_get_hash_recalc(skb)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7094c69..daea765 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1053,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_in_cgroup)
+		if (func_id != BPF_FUNC_skb_under_cgroup)
 			goto error;
 		break;
 	default:
@@ -1075,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
-	case BPF_FUNC_skb_in_cgroup:
+	case BPF_FUNC_skb_under_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
 		break;
diff --git a/net/core/filter.c b/net/core/filter.c
index b5add4e..bd9bf2e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2317,7 +2317,7 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 }
 
 #ifdef CONFIG_SOCK_CGROUP_DATA
-static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long)r1;
 	struct bpf_map *map = (struct bpf_map *)(long)r2;
@@ -2340,8 +2340,8 @@ static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
 }
 
-static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
-	.func		= bpf_skb_in_cgroup,
+static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
+	.func		= bpf_skb_under_cgroup,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
@@ -2421,8 +2421,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
 #ifdef CONFIG_SOCK_CGROUP_DATA
-	case BPF_FUNC_skb_in_cgroup:
-		return &bpf_skb_in_cgroup_proto;
+	case BPF_FUNC_skb_under_cgroup:
+		return &bpf_skb_under_cgroup_proto;
 #endif
 	default:
 		return sk_filter_func_proto(func_id);
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 217c8d5..7927a09 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -72,8 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
-static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
-	(void *) BPF_FUNC_skb_in_cgroup;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_skb_under_cgroup;
 
 #if defined(__x86_64__)
 
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
index 2732c37..10ff734 100644
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -57,7 +57,7 @@ int handle_egress(struct __sk_buff *skb)
 		bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
 				 eth->h_proto, ip6h->nexthdr);
 		return TC_ACT_OK;
-	} else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+	} else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
 		bpf_trace_printk(pass_msg, sizeof(pass_msg));
 		return TC_ACT_OK;
 	} else {
-- 
cgit v0.10.2


From b4c0e0c61f81dedc82dda35c287ea149ff98b434 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 11 Aug 2016 18:17:22 +0100
Subject: calipso: fix resource leak on calipso_genopt failure

Currently, if calipso_genopt fails then the error exit path
does not free the ipv6_opt_hdr new causing a memory leak. Fix
this by kfree'ing new on the error exit path.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index c53b92c..37ac9de 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -952,8 +952,10 @@ calipso_opt_insert(struct ipv6_opt_hdr *hop,
 		memcpy(new, hop, start);
 	ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def,
 				 secattr);
-	if (ret_val < 0)
+	if (ret_val < 0) {
+		kfree(new);
 		return ERR_PTR(ret_val);
+	}
 
 	buf_len = start + ret_val;
 	/* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */
-- 
cgit v0.10.2


From d7005652cd31dfc5660e1e32bf7e53538ef14987 Mon Sep 17 00:00:00 2001
From: "sean.wang@mediatek.com" <sean.wang@mediatek.com>
Date: Sat, 13 Aug 2016 19:16:18 +0800
Subject: net: ethernet: mediatek: fixed that initializing u64_stats_sync is
 missing

To fix runtime warning with lockdep is enabled due that u64_stats_sync
is not initialized well, so add it.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b57ae3a..fe17f8c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1751,6 +1751,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 		goto free_netdev;
 	}
 	spin_lock_init(&mac->hw_stats->stats_lock);
+	u64_stats_init(&mac->hw_stats->syncp);
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
-- 
cgit v0.10.2


From e8c2993a4c9fdb0c9e6fc983edd5b52716ce7442 Mon Sep 17 00:00:00 2001
From: "sean.wang@mediatek.com" <sean.wang@mediatek.com>
Date: Sat, 13 Aug 2016 19:16:19 +0800
Subject: net: ethernet: mediatek: add the missing of_node_put() after node is
 used done

This patch adds the missing of_node_put() after finishing the usage
of of_parse_phandle() or of_node_get() used by fixed_phy.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index fe17f8c..0030361 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -269,6 +269,8 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 				    ADVERTISED_Autoneg;
 	phy_start_aneg(mac->phy_dev);
 
+	of_node_put(np);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 0ed661d5a48fa6df0b50ae64d27fe759a3ce42cf Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 11 Aug 2016 21:38:37 +0200
Subject: bpf: fix write helpers with regards to non-linear parts

Fix the bpf_try_make_writable() helper and all call sites we have in BPF,
it's currently defect with regards to skbs when the write_len spans into
non-linear parts, no matter if cloned or not.

There are multiple issues at once. First, using skb_store_bits() is not
correct since even if we have a cloned skb, page frags can still be shared.
To really make them private, we need to pull them in via __pskb_pull_tail()
first, which also gets us a private head via pskb_expand_head() implicitly.

This is for helpers like bpf_skb_store_bytes(), bpf_l3_csum_replace(),
bpf_l4_csum_replace(). Really, the only thing reasonable and working here
is to call skb_ensure_writable() before any write operation. Meaning, via
pskb_may_pull() it makes sure that parts we want to access are pulled in and
if not does so plus unclones the skb implicitly. If our write_len still fits
the headlen and we're cloned and our header of the clone is not writable,
then we need to make a private copy via pskb_expand_head(). skb_store_bits()
is a bit misleading and only safe to store into non-linear data in different
contexts such as 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can
corrupt kernel memory").

For above BPF helper functions, it means after fixed bpf_try_make_writable(),
we've pulled in enough, so that we operate always based on skb->data. Thus,
the call to skb_header_pointer() and skb_store_bits() becomes superfluous.
In bpf_skb_store_bytes(), the len check is unnecessary too since it can
only pass in maximum of BPF stack size, so adding offset is guaranteed to
never overflow. Also bpf_l3/4_csum_replace() helpers must test for proper
offset alignment since they use __sum16 pointer for writing resulting csum.

The remaining helpers that change skb data not discussed here yet are
bpf_skb_vlan_push(), bpf_skb_vlan_pop() and bpf_skb_change_proto(). The
vlan helpers internally call either skb_ensure_writable() (pop case) and
skb_cow_head() (push case, for head expansion), respectively. Similarly,
bpf_skb_proto_xlat() takes care to not mangle page frags.

Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action")
Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers")
Fixes: 3697649ff29e ("bpf: try harder on clones when writing into skb")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/filter.c b/net/core/filter.c
index bd9bf2e..cb06ace 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1355,13 +1355,9 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
 {
 	int err;
 
-	if (!skb_cloned(skb))
-		return 0;
-	if (skb_clone_writable(skb, write_len))
-		return 0;
-	err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-	if (!err)
-		bpf_compute_data_end(skb);
+	err = skb_ensure_writable(skb, write_len);
+	bpf_compute_data_end(skb);
+
 	return err;
 }
 
@@ -1379,42 +1375,25 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
 
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
-	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	int offset = (int) r2;
+	unsigned int offset = (unsigned int) r2;
 	void *from = (void *) (long) r3;
 	unsigned int len = (unsigned int) r4;
 	void *ptr;
 
 	if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
 		return -EINVAL;
-
-	/* bpf verifier guarantees that:
-	 * 'from' pointer points to bpf program stack
-	 * 'len' bytes of it were initialized
-	 * 'len' > 0
-	 * 'skb' is a valid pointer to 'struct sk_buff'
-	 *
-	 * so check for invalid 'offset' and too large 'len'
-	 */
-	if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
+	if (unlikely(offset > 0xffff))
 		return -EFAULT;
 	if (unlikely(bpf_try_make_writable(skb, offset + len)))
 		return -EFAULT;
 
-	ptr = skb_header_pointer(skb, offset, len, sp->buff);
-	if (unlikely(!ptr))
-		return -EFAULT;
-
+	ptr = skb->data + offset;
 	if (flags & BPF_F_RECOMPUTE_CSUM)
 		__skb_postpull_rcsum(skb, ptr, len, offset);
 
 	memcpy(ptr, from, len);
 
-	if (ptr == sp->buff)
-		/* skb_store_bits cannot return -EFAULT here */
-		skb_store_bits(skb, offset, ptr, len);
-
 	if (flags & BPF_F_RECOMPUTE_CSUM)
 		__skb_postpush_rcsum(skb, ptr, len, offset);
 	if (flags & BPF_F_INVALIDATE_HASH)
@@ -1437,12 +1416,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
 	const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
-	int offset = (int) r2;
+	unsigned int offset = (unsigned int) r2;
 	void *to = (void *)(unsigned long) r3;
 	unsigned int len = (unsigned int) r4;
 	void *ptr;
 
-	if (unlikely((u32) offset > 0xffff))
+	if (unlikely(offset > 0xffff))
 		goto err_clear;
 
 	ptr = skb_header_pointer(skb, offset, len, to);
@@ -1470,20 +1449,17 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
 static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	int offset = (int) r2;
-	__sum16 sum, *ptr;
+	unsigned int offset = (unsigned int) r2;
+	__sum16 *ptr;
 
 	if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
 		return -EINVAL;
-	if (unlikely((u32) offset > 0xffff))
-		return -EFAULT;
-	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+	if (unlikely(offset > 0xffff || offset & 1))
 		return -EFAULT;
-
-	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-	if (unlikely(!ptr))
+	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
 		return -EFAULT;
 
+	ptr = (__sum16 *)(skb->data + offset);
 	switch (flags & BPF_F_HDR_FIELD_MASK) {
 	case 0:
 		if (unlikely(from != 0))
@@ -1501,10 +1477,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 		return -EINVAL;
 	}
 
-	if (ptr == &sum)
-		/* skb_store_bits guaranteed to not return -EFAULT here */
-		skb_store_bits(skb, offset, ptr, sizeof(sum));
-
 	return 0;
 }
 
@@ -1524,20 +1496,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
 	bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
-	int offset = (int) r2;
-	__sum16 sum, *ptr;
+	unsigned int offset = (unsigned int) r2;
+	__sum16 *ptr;
 
 	if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
 			       BPF_F_HDR_FIELD_MASK)))
 		return -EINVAL;
-	if (unlikely((u32) offset > 0xffff))
+	if (unlikely(offset > 0xffff || offset & 1))
 		return -EFAULT;
-	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
 		return -EFAULT;
 
-	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-	if (unlikely(!ptr))
-		return -EFAULT;
+	ptr = (__sum16 *)(skb->data + offset);
 	if (is_mmzero && !*ptr)
 		return 0;
 
@@ -1560,10 +1530,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 
 	if (is_mmzero && !*ptr)
 		*ptr = CSUM_MANGLED_0;
-	if (ptr == &sum)
-		/* skb_store_bits guaranteed to not return -EFAULT here */
-		skb_store_bits(skb, offset, ptr, sizeof(sum));
-
 	return 0;
 }
 
-- 
cgit v0.10.2


From c15c0ab12fd62f2b19181d05c62d24bc9fa55a42 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Aug 2016 07:48:21 +0200
Subject: ipv6: suppress sparse warnings in IP6_ECN_set_ce()

Pass the correct type __wsum to csum_sub() and csum_add(). This doesn't
really change anything since __wsum really *is* __be32, but removes the
address space warnings from sparse.

Cc: Eric Dumazet <edumazet@google.com>
Fixes: 34ae6a1aa054 ("ipv6: update skb->csum when CE mark is propagated")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 0dc0a51..dce2d58 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -128,7 +128,8 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
 	to = from | htonl(INET_ECN_CE << 20);
 	*(__be32 *)iph = to;
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_add(csum_sub(skb->csum, from), to);
+		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+				     (__force __wsum)to);
 	return 1;
 }
 
-- 
cgit v0.10.2


From 5ba092efc7ddff040777ae7162f1d195f513571b Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Fri, 12 Aug 2016 10:29:13 +0200
Subject: net/irda: handle iriap_register_lsap() allocation failure

If iriap_register_lsap() fails to allocate memory, self->lsap is
set to NULL. However, none of the callers handle the failure and
irlmp_connect_request() will happily dereference it:

    iriap_register_lsap: Unable to allocated LSAP!
    ================================================================================
    UBSAN: Undefined behaviour in net/irda/irlmp.c:378:2
    member access within null pointer of type 'struct lsap_cb'
    CPU: 1 PID: 15403 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #81
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org
    04/01/2014
     0000000000000000 ffff88010c7e78a8 ffffffff82344f40 0000000041b58ab3
     ffffffff84f98000 ffffffff82344e94 ffff88010c7e78d0 ffff88010c7e7880
     ffff88010630ad00 ffffffff84a5fae0 ffffffff84d3f5c0 000000000000017a
    Call Trace:
     [<ffffffff82344f40>] dump_stack+0xac/0xfc
     [<ffffffff8242f5a8>] ubsan_epilogue+0xd/0x8a
     [<ffffffff824302bf>] __ubsan_handle_type_mismatch+0x157/0x411
     [<ffffffff83b7bdbc>] irlmp_connect_request+0x7ac/0x970
     [<ffffffff83b77cc0>] iriap_connect_request+0xa0/0x160
     [<ffffffff83b77f48>] state_s_disconnect+0x88/0xd0
     [<ffffffff83b78904>] iriap_do_client_event+0x94/0x120
     [<ffffffff83b77710>] iriap_getvaluebyclass_request+0x3e0/0x6d0
     [<ffffffff83ba6ebb>] irda_find_lsap_sel+0x1eb/0x630
     [<ffffffff83ba90c8>] irda_connect+0x828/0x12d0
     [<ffffffff833c0dfb>] SYSC_connect+0x22b/0x340
     [<ffffffff833c7e09>] SyS_connect+0x9/0x10
     [<ffffffff81007bd3>] do_syscall_64+0x1b3/0x4b0
     [<ffffffff845f946a>] entry_SYSCALL64_slow_path+0x25/0x25
    ================================================================================

The bug seems to have been around since forever.

There's more problems with missing error checks in iriap_init() (and
indeed all of irda_init()), but that's a bigger problem that needs
very careful review and testing. This patch will fix the most serious
bug (as it's easily reached from unprivileged userspace).

I have tested my patch with a reproducer.

Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 4a7ae32a..1138eaf 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
 
 	self->magic = IAS_MAGIC;
 	self->mode = mode;
-	if (mode == IAS_CLIENT)
-		iriap_register_lsap(self, slsap_sel, mode);
+	if (mode == IAS_CLIENT) {
+		if (iriap_register_lsap(self, slsap_sel, mode)) {
+			kfree(self);
+			return NULL;
+		}
+	}
 
 	self->confirm = callback;
 	self->priv = priv;
-- 
cgit v0.10.2


From 54236ab09e9696a27baaae693c288920a26e8588 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Fri, 12 Aug 2016 09:50:51 +0200
Subject: net/sctp: always initialise sctp_ht_iter::start_fail

sctp_transport_seq_start() does not currently clear iter->start_fail on
success, but relies on it being zero when it is allocated (by
seq_open_net()).

This can be a problem in the following sequence:

    open() // allocates iter (and implicitly sets iter->start_fail = 0)
    read()
     - iter->start() // fails and sets iter->start_fail = 1
     - iter->stop() // doesn't call sctp_transport_walk_stop() (correct)
    read() again
     - iter->start() // succeeds, but doesn't change iter->start_fail
     - iter->stop() // doesn't call sctp_transport_walk_stop() (wrong)

We should initialize sctp_ht_iter::start_fail to zero if ->start()
succeeds, otherwise it's possible that we leave an old value of 1 there,
which will cause ->stop() to not call sctp_transport_walk_stop(), which
causes all sorts of problems like not calling rcu_read_unlock() (and
preempt_enable()), eventually leading to more warnings like this:

    BUG: sleeping function called from invalid context at mm/slab.h:388
    in_atomic(): 0, irqs_disabled(): 0, pid: 16551, name: trinity-c2
    Preemption disabled at:[<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150

     [<ffffffff81149abb>] preempt_count_add+0x1fb/0x280
     [<ffffffff83295892>] _raw_spin_lock+0x12/0x40
     [<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150
     [<ffffffff82ec665f>] sctp_transport_walk_start+0x2f/0x60
     [<ffffffff82edda1d>] sctp_transport_seq_start+0x4d/0x150
     [<ffffffff81439e50>] traverse+0x170/0x850
     [<ffffffff8143aeec>] seq_read+0x7cc/0x1180
     [<ffffffff814f996c>] proc_reg_read+0xbc/0x180
     [<ffffffff813d0384>] do_loop_readv_writev+0x134/0x210
     [<ffffffff813d2a95>] do_readv_writev+0x565/0x660
     [<ffffffff813d6857>] vfs_readv+0x67/0xa0
     [<ffffffff813d6c16>] do_preadv+0x126/0x170
     [<ffffffff813d710c>] SyS_preadv+0xc/0x10
     [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
     [<ffffffff83296225>] return_from_SYSCALL_64+0x0/0x6a
     [<ffffffffffffffff>] 0xffffffffffffffff

Notice that this is a subtly different stacktrace from the one in commit
5fc382d875 ("net/sctp: terminate rhashtable walk correctly").

Cc: Xin Long <lucien.xin@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-By: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 4cb5aed..ef8ba77 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
 		return ERR_PTR(err);
 	}
 
+	iter->start_fail = 0;
 	return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
 }
 
-- 
cgit v0.10.2


From bc561632dddd5af0c4444d919f01cbf6d553aa0a Mon Sep 17 00:00:00 2001
From: Mike Manning <mmanning@brocade.com>
Date: Fri, 12 Aug 2016 12:02:38 +0100
Subject: net: ipv6: Do not keep IPv6 addresses when IPv6 is disabled

If IPv6 is disabled when the option is set to keep IPv6
addresses on link down, userspace is unaware of this as
there is no such indication via netlink. The solution is to
remove the IPv6 addresses in this case, which results in
netlink messages indicating removal of addresses in the
usual manner. This fix also makes the behavior consistent
with the case of having IPv6 disabled first, which stops
IPv6 addresses from being added.

Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
Signed-off-by: Mike Manning <mmanning@brocade.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab3e796..df8425f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3543,7 +3543,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	/* combine the user config with event to determine if permanent
 	 * addresses are to be removed from address hash table
 	 */
-	keep_addr = !(how || _keep_addr <= 0);
+	keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
 
 	/* Step 2: clear hash table */
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3599,7 +3599,7 @@ restart:
 	/* re-combine the user config with event to determine if permanent
 	 * addresses are to be removed from the interface list
 	 */
-	keep_addr = (!how && _keep_addr > 0);
+	keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
 	INIT_LIST_HEAD(&del_list);
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
-- 
cgit v0.10.2


From e20038724552cd05e351cd7d7526d646953d26b7 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 12 Aug 2016 16:10:32 +0200
Subject: macsec: fix lockdep splats when nesting devices

Currently, trying to setup a vlan over a macsec device, or other
combinations of devices, triggers a lockdep warning.

Use netdev_lockdep_set_classes and ndo_get_lock_subclass, similar to
what macvlan does.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index dbd590a..2043e8c 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -270,6 +270,7 @@ struct macsec_dev {
 	struct pcpu_secy_stats __percpu *stats;
 	struct list_head secys;
 	struct gro_cells gro_cells;
+	unsigned int nest_level;
 };
 
 /**
@@ -2699,6 +2700,8 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 
 #define MACSEC_FEATURES \
 	(NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
+static struct lock_class_key macsec_netdev_addr_lock_key;
+
 static int macsec_dev_init(struct net_device *dev)
 {
 	struct macsec_dev *macsec = macsec_priv(dev);
@@ -2910,6 +2913,13 @@ static int macsec_get_iflink(const struct net_device *dev)
 	return macsec_priv(dev)->real_dev->ifindex;
 }
 
+
+static int macsec_get_nest_level(struct net_device *dev)
+{
+	return macsec_priv(dev)->nest_level;
+}
+
+
 static const struct net_device_ops macsec_netdev_ops = {
 	.ndo_init		= macsec_dev_init,
 	.ndo_uninit		= macsec_dev_uninit,
@@ -2923,6 +2933,7 @@ static const struct net_device_ops macsec_netdev_ops = {
 	.ndo_start_xmit		= macsec_start_xmit,
 	.ndo_get_stats64	= macsec_get_stats64,
 	.ndo_get_iflink		= macsec_get_iflink,
+	.ndo_get_lock_subclass  = macsec_get_nest_level,
 };
 
 static const struct device_type macsec_type = {
@@ -3050,10 +3061,12 @@ static void macsec_del_dev(struct macsec_dev *macsec)
 static void macsec_common_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macsec_dev *macsec = macsec_priv(dev);
+	struct net_device *real_dev = macsec->real_dev;
 
 	unregister_netdevice_queue(dev, head);
 	list_del_rcu(&macsec->secys);
 	macsec_del_dev(macsec);
+	netdev_upper_dev_unlink(real_dev, dev);
 
 	macsec_generation++;
 }
@@ -3188,6 +3201,16 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
 	dev_hold(real_dev);
 
+	macsec->nest_level = dev_get_nest_level(real_dev, netif_is_macsec) + 1;
+	netdev_lockdep_set_classes(dev);
+	lockdep_set_class_and_subclass(&dev->addr_list_lock,
+				       &macsec_netdev_addr_lock_key,
+				       macsec_get_nest_level(dev));
+
+	err = netdev_upper_dev_link(real_dev, dev);
+	if (err < 0)
+		goto unregister;
+
 	/* need to be already registered so that ->init has run and
 	 * the MAC addr is set
 	 */
@@ -3200,12 +3223,12 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
 	if (rx_handler && sci_exists(real_dev, sci)) {
 		err = -EBUSY;
-		goto unregister;
+		goto unlink;
 	}
 
 	err = macsec_add_dev(dev, sci, icv_len);
 	if (err)
-		goto unregister;
+		goto unlink;
 
 	if (data)
 		macsec_changelink_common(dev, data);
@@ -3220,6 +3243,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
 del_dev:
 	macsec_del_dev(macsec);
+unlink:
+	netdev_upper_dev_unlink(real_dev, dev);
 unregister:
 	unregister_netdevice(dev);
 	return err;
-- 
cgit v0.10.2


From 952fcfd08c8109951622579d0ae7b9cd6cafd688 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 12 Aug 2016 16:10:33 +0200
Subject: net: remove type_check from dev_get_nest_level()

The idea for type_check in dev_get_nest_level() was to count the number
of nested devices of the same type (currently, only macvlan or vlan
devices).
This prevented the false positive lockdep warning on configurations such
as:

eth0 <--- macvlan0 <--- vlan0 <--- macvlan1

However, this doesn't prevent a warning on a configuration such as:

eth0 <--- macvlan0 <--- vlan0
eth1 <--- vlan1 <--- macvlan1

In this case, all the locks end up with a nesting subclass of 1, so
lockdep thinks that there is still a deadlock:

- in the first case we have (macvlan_netdev_addr_lock_key, 1) and then
  take (vlan_netdev_xmit_lock_key, 1)
- in the second case, we have (vlan_netdev_xmit_lock_key, 1) and then
  take (macvlan_netdev_addr_lock_key, 1)

By removing the linktype check in dev_get_nest_level() and always
incrementing the nesting depth, lockdep considers this configuration
valid.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 2043e8c..351e701 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3201,7 +3201,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
 	dev_hold(real_dev);
 
-	macsec->nest_level = dev_get_nest_level(real_dev, netif_is_macsec) + 1;
+	macsec->nest_level = dev_get_nest_level(real_dev) + 1;
 	netdev_lockdep_set_classes(dev);
 	lockdep_set_class_and_subclass(&dev->addr_list_lock,
 				       &macsec_netdev_addr_lock_key,
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cd9b538..3234fcd 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1315,7 +1315,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	vlan->dev      = dev;
 	vlan->port     = port;
 	vlan->set_features = MACVLAN_FEATURES;
-	vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1;
+	vlan->nest_level = dev_get_nest_level(lowerdev) + 1;
 
 	vlan->mode     = MACVLAN_MODE_VEPA;
 	if (data && data[IFLA_MACVLAN_MODE])
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 076df53..3a788bf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3891,8 +3891,7 @@ void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
 extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
 void netdev_rss_key_fill(void *buffer, size_t len);
 
-int dev_get_nest_level(struct net_device *dev,
-		       bool (*type_check)(const struct net_device *dev));
+int dev_get_nest_level(struct net_device *dev);
 int skb_checksum_help(struct sk_buff *skb);
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 82a116b..8de138d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,7 +169,7 @@ int register_vlan_dev(struct net_device *dev)
 	if (err < 0)
 		goto out_uninit_mvrp;
 
-	vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
+	vlan->nest_level = dev_get_nest_level(real_dev) + 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out_uninit_mvrp;
diff --git a/net/core/dev.c b/net/core/dev.c
index 4ce07dc..dd6ce59 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6045,8 +6045,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 
 
-int dev_get_nest_level(struct net_device *dev,
-		       bool (*type_check)(const struct net_device *dev))
+int dev_get_nest_level(struct net_device *dev)
 {
 	struct net_device *lower = NULL;
 	struct list_head *iter;
@@ -6056,15 +6055,12 @@ int dev_get_nest_level(struct net_device *dev,
 	ASSERT_RTNL();
 
 	netdev_for_each_lower_dev(dev, lower, iter) {
-		nest = dev_get_nest_level(lower, type_check);
+		nest = dev_get_nest_level(lower);
 		if (max_nest < nest)
 			max_nest = nest;
 	}
 
-	if (type_check(dev))
-		max_nest++;
-
-	return max_nest;
+	return max_nest + 1;
 }
 EXPORT_SYMBOL(dev_get_nest_level);
 
-- 
cgit v0.10.2


From 4cf0b354d92ee2c642532ee39e330f8f580fd985 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 12 Aug 2016 12:03:52 +0200
Subject: rhashtable: avoid large lock-array allocations

Sander reports following splat after netfilter nat bysrc table got
converted to rhashtable:

swapper/0: page allocation failure: order:3, mode:0x2084020(GFP_ATOMIC|__GFP_COMP)
 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc1 [..]
 [<ffffffff811633ed>] warn_alloc_failed+0xdd/0x140
 [<ffffffff811638b1>] __alloc_pages_nodemask+0x3e1/0xcf0
 [<ffffffff811a72ed>] alloc_pages_current+0x8d/0x110
 [<ffffffff8117cb7f>] kmalloc_order+0x1f/0x70
 [<ffffffff811aec19>] __kmalloc+0x129/0x140
 [<ffffffff8146d561>] bucket_table_alloc+0xc1/0x1d0
 [<ffffffff8146da1d>] rhashtable_insert_rehash+0x5d/0xe0
 [<ffffffff819fcfff>] nf_nat_setup_info+0x2ef/0x400

The failure happens when allocating the spinlock array.
Even with GFP_KERNEL its unlikely for such a large allocation
to succeed.

Thomas Graf pointed me at inet_ehash_locks_alloc(), so in addition
to adding NOWARN for atomic allocations this also makes the bucket-array
sizing more conservative.

In commit 095dc8e0c3686 ("tcp: fix/cleanup inet_ehash_locks_alloc()"),
Eric Dumazet says: "Budget 2 cache lines per cpu worth of 'spinlocks'".
IOW, consider size needed by a single spinlock when determining
number of locks per cpu.  So with 64 byte per cacheline and 4 byte per
spinlock this gives 32 locks per cpu.

Resulting size of the lock-array (sizeof(spinlock) == 4):

cpus:    1   2   4   8   16   32   64
old:    1k  1k  4k  8k  16k  16k  16k
new:   128 256 512  1k   2k   4k   8k

8k allocation should have decent chance of success even
with GFP_ATOMIC, and should not fail with GFP_KERNEL.

With 72-byte spinlock (LOCKDEP):
cpus :   1   2
old:    9k 18k
new:   ~2k ~4k

Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 5d845ff..42acd81 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U
-#define BUCKET_LOCKS_PER_CPU   128UL
+#define BUCKET_LOCKS_PER_CPU	32UL
 
 static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
@@ -70,7 +70,7 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	unsigned int nr_pcpus = num_possible_cpus();
 #endif
 
-	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+	nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
 	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
 
 	/* Never allocate more than 0.5 locks per bucket */
@@ -83,6 +83,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 			tbl->locks = vmalloc(size * sizeof(spinlock_t));
 		else
 #endif
+		if (gfp != GFP_KERNEL)
+			gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
 		tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
 					   gfp);
 		if (!tbl->locks)
-- 
cgit v0.10.2


From eb8fc32354aa77678dc6e7950a8f0c79cace204f Mon Sep 17 00:00:00 2001
From: Vincent <vincent.stehle@laposte.net>
Date: Sun, 14 Aug 2016 15:38:29 +0200
Subject: mlxsw: spectrum_router: Fix use after free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In mlxsw_sp_router_fib4_add_info_destroy(), the fib_entry pointer is used
after it has been freed by mlxsw_sp_fib_entry_destroy(). Use a temporary
variable to fix this.

Fixes: 61c503f976b5449e ("mlxsw: spectrum_router: Implement fib4 add/del switchdev obj ops")
Signed-off-by: Vincent Stehlé <vincent.stehle@laposte.net>
Cc: Jiri Pirko <jiri@mellanox.com>
Acked-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 81418d6..90bb93b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1651,9 +1651,10 @@ static void mlxsw_sp_router_fib4_add_info_destroy(void const *data)
 	const struct mlxsw_sp_router_fib4_add_info *info = data;
 	struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
 	struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
 
 	mlxsw_sp_fib_entry_destroy(fib_entry);
-	mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr);
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
 	kfree(info);
 }
 
-- 
cgit v0.10.2


From 12311959ecf8a3a64676c01b62ce67a0c5f0fd49 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Fri, 12 Aug 2016 20:10:44 +0200
Subject: rhashtable: fix shift by 64 when shrinking

I got this:

    ================================================================================
    UBSAN: Undefined behaviour in ./include/linux/log2.h:63:13
    shift exponent 64 is too large for 64-bit type 'long unsigned int'
    CPU: 1 PID: 721 Comm: kworker/1:1 Not tainted 4.8.0-rc1+ #87
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
    Workqueue: events rht_deferred_worker
     0000000000000000 ffff88011661f8d8 ffffffff82344f50 0000000041b58ab3
     ffffffff84f98000 ffffffff82344ea4 ffff88011661f900 ffff88011661f8b0
     0000000000000001 ffff88011661f6b8 dffffc0000000000 ffffffff867f7640
    Call Trace:
     [<ffffffff82344f50>] dump_stack+0xac/0xfc
     [<ffffffff82344ea4>] ? _atomic_dec_and_lock+0xc4/0xc4
     [<ffffffff8242f5b8>] ubsan_epilogue+0xd/0x8a
     [<ffffffff82430c41>] __ubsan_handle_shift_out_of_bounds+0x255/0x29a
     [<ffffffff824309ec>] ? __ubsan_handle_out_of_bounds+0x180/0x180
     [<ffffffff84003436>] ? nl80211_req_set_reg+0x256/0x2f0
     [<ffffffff812112ba>] ? print_context_stack+0x8a/0x160
     [<ffffffff81200031>] ? amd_pmu_reset+0x341/0x380
     [<ffffffff823af808>] rht_deferred_worker+0x1618/0x1790
     [<ffffffff823af808>] ? rht_deferred_worker+0x1618/0x1790
     [<ffffffff823ae1f0>] ? rhashtable_jhash2+0x370/0x370
     [<ffffffff8134c12d>] ? process_one_work+0x6fd/0x1970
     [<ffffffff8134c1cf>] process_one_work+0x79f/0x1970
     [<ffffffff8134c12d>] ? process_one_work+0x6fd/0x1970
     [<ffffffff8134ba30>] ? try_to_grab_pending+0x4c0/0x4c0
     [<ffffffff8134d564>] ? worker_thread+0x1c4/0x1340
     [<ffffffff8134d8ff>] worker_thread+0x55f/0x1340
     [<ffffffff845e904f>] ? __schedule+0x4df/0x1d40
     [<ffffffff8134d3a0>] ? process_one_work+0x1970/0x1970
     [<ffffffff8134d3a0>] ? process_one_work+0x1970/0x1970
     [<ffffffff813642f7>] kthread+0x237/0x390
     [<ffffffff813640c0>] ? __kthread_parkme+0x280/0x280
     [<ffffffff845f8c93>] ? _raw_spin_unlock_irq+0x33/0x50
     [<ffffffff845f95df>] ret_from_fork+0x1f/0x40
     [<ffffffff813640c0>] ? __kthread_parkme+0x280/0x280
    ================================================================================

roundup_pow_of_two() is undefined when called with an argument of 0, so
let's avoid the call and just fall back to ht->p.min_size (which should
never be smaller than HASH_MIN_SIZE).

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 42acd81..5ba520b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -324,12 +324,14 @@ static int rhashtable_expand(struct rhashtable *ht)
 static int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-	unsigned int size;
+	unsigned int nelems = atomic_read(&ht->nelems);
+	unsigned int size = 0;
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	if (nelems)
+		size = roundup_pow_of_two(nelems * 3 / 2);
 	if (size < ht->p.min_size)
 		size = ht->p.min_size;
 
-- 
cgit v0.10.2


From 5e457896986e16c440c97bb94b9ccd95dd157292 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Sat, 13 Aug 2016 01:13:38 +0900
Subject: net: ipv6: Fix ping to link-local addresses.

ping_v6_sendmsg does not set flowi6_oif in response to
sin6_scope_id or sk_bound_dev_if, so it is not possible to use
these APIs to ping an IPv6 address on a different interface.
Instead, it sets flowi6_iif, which is incorrect but harmless.

Stop setting flowi6_iif, and support various ways of setting oif
in the same priority order used by udpv6_sendmsg.

Tested: https://android-review.googlesource.com/#/c/254470/
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index fed40d1..0900352 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -55,7 +55,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct icmp6hdr user_icmph;
 	int addr_type;
 	struct in6_addr *daddr;
-	int iif = 0;
+	int oif = 0;
 	struct flowi6 fl6;
 	int err;
 	struct dst_entry *dst;
@@ -78,25 +78,30 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		if (u->sin6_family != AF_INET6) {
 			return -EAFNOSUPPORT;
 		}
-		if (sk->sk_bound_dev_if &&
-		    sk->sk_bound_dev_if != u->sin6_scope_id) {
-			return -EINVAL;
-		}
 		daddr = &(u->sin6_addr);
-		iif = u->sin6_scope_id;
+		if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
+			oif = u->sin6_scope_id;
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 		daddr = &sk->sk_v6_daddr;
 	}
 
-	if (!iif)
-		iif = sk->sk_bound_dev_if;
+	if (!oif)
+		oif = sk->sk_bound_dev_if;
+
+	if (!oif)
+		oif = np->sticky_pktinfo.ipi6_ifindex;
+
+	if (!oif && ipv6_addr_is_multicast(daddr))
+		oif = np->mcast_oif;
+	else if (!oif)
+		oif = np->ucast_oif;
 
 	addr_type = ipv6_addr_type(daddr);
-	if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
-		return -EINVAL;
-	if (addr_type & IPV6_ADDR_MAPPED)
+	if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
+	    (addr_type & IPV6_ADDR_MAPPED) ||
+	    (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
 		return -EINVAL;
 
 	/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
@@ -106,16 +111,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
 	fl6.saddr = np->saddr;
 	fl6.daddr = *daddr;
+	fl6.flowi6_oif = oif;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_icmp_type = user_icmph.icmp6_type;
 	fl6.fl6_icmp_code = user_icmph.icmp6_code;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
-		fl6.flowi6_oif = np->mcast_oif;
-	else if (!fl6.flowi6_oif)
-		fl6.flowi6_oif = np->ucast_oif;
-
 	ipc6.tclass = np->tclass;
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
-- 
cgit v0.10.2


From 3d7b33209201cbfa090d614db993571ca3c6b090 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Mon, 15 Aug 2016 13:06:24 +0200
Subject: gre: set inner_protocol on xmit

Ensure that the inner_protocol is set on transmit so that GSO segmentation,
which relies on that field, works correctly.

This is achieved by setting the inner_protocol in gre_build_header rather
than each caller of that function. It ensures that the inner_protocol is
set when gre_fb_xmit() is used to transmit GRE which was not previously the
case.

I have observed this is not the case when OvS transmits GRE using
lwtunnel metadata (which it always does).

Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol")
Cc: Pravin Shelar <pshelar@ovn.org>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/gre.h b/include/net/gre.h
index 7a54a31..73ea256 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -104,6 +104,7 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len,
 
 	skb_push(skb, hdr_len);
 
+	skb_set_inner_protocol(skb, proto);
 	skb_reset_transport_header(skb);
 	greh = (struct gre_base_hdr *)skb->data;
 	greh->flags = gre_tnl_flags_to_gre_flags(flags);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5b1481b..113cc43 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -370,7 +370,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 			 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
 			 htonl(tunnel->o_seqno));
 
-	skb_set_inner_protocol(skb, proto);
 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 }
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 776d145..704274c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -519,8 +519,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 	gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 			 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
 
-	skb_set_inner_protocol(skb, protocol);
-
 	return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
 			    NEXTHDR_GRE);
 }
-- 
cgit v0.10.2


From f9a7da9130ef0143eb900794c7863dc5c9051fbc Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 15 Aug 2016 17:48:39 +0200
Subject: hv_netvsc: don't lose VF information

struct netvsc_device is not suitable for storing VF information as this
structure is being destroyed on MTU change / set channel operation (see
rndis_filter_device_remove()). Move all VF related stuff to struct
net_device_context which is persistent.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 467fb8b..3b3ecf2 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -647,7 +647,7 @@ struct netvsc_reconfig {
 struct garp_wrk {
 	struct work_struct dwrk;
 	struct net_device *netdev;
-	struct netvsc_device *netvsc_dev;
+	struct net_device_context *net_device_ctx;
 };
 
 /* The context of the netvsc device  */
@@ -678,6 +678,15 @@ struct net_device_context {
 
 	/* the device is going away */
 	bool start_remove;
+
+	/* State to manage the associated VF interface. */
+	struct net_device *vf_netdev;
+	bool vf_inject;
+	atomic_t vf_use_cnt;
+	/* 1: allocated, serial number is valid. 0: not allocated */
+	u32 vf_alloc;
+	/* Serial number of the VF to team with */
+	u32 vf_serial;
 };
 
 /* Per netvsc device */
@@ -733,15 +742,7 @@ struct netvsc_device {
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-	/* 1: allocated, serial number is valid. 0: not allocated */
-	u32 vf_alloc;
-	/* Serial number of the VF to team with */
-	u32 vf_serial;
 	atomic_t open_cnt;
-	/* State to manage the associated VF interface. */
-	bool vf_inject;
-	struct net_device *vf_netdev;
-	atomic_t vf_use_cnt;
 };
 
 static inline struct netvsc_device *
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 20e0917..410fb8e8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void)
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
-	atomic_set(&net_device->vf_use_cnt, 0);
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-	net_device->vf_netdev = NULL;
-	net_device->vf_inject = false;
-
 	return net_device;
 }
 
@@ -1106,16 +1102,16 @@ static void netvsc_send_table(struct hv_device *hdev,
 		nvscdev->send_table[i] = tab[i];
 }
 
-static void netvsc_send_vf(struct netvsc_device *nvdev,
+static void netvsc_send_vf(struct net_device_context *net_device_ctx,
 			   struct nvsp_message *nvmsg)
 {
-	nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
-	nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+	net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+	net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
 }
 
 static inline void netvsc_receive_inband(struct hv_device *hdev,
-					 struct netvsc_device *nvdev,
-					 struct nvsp_message *nvmsg)
+				 struct net_device_context *net_device_ctx,
+				 struct nvsp_message *nvmsg)
 {
 	switch (nvmsg->hdr.msg_type) {
 	case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
@@ -1123,7 +1119,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
 		break;
 
 	case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
-		netvsc_send_vf(nvdev, nvmsg);
+		netvsc_send_vf(net_device_ctx, nvmsg);
 		break;
 	}
 }
@@ -1136,6 +1132,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
 				   struct vmpacket_descriptor *desc)
 {
 	struct nvsp_message *nvmsg;
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 
 	nvmsg = (struct nvsp_message *)((unsigned long)
 		desc + (desc->offset8 << 3));
@@ -1150,7 +1147,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
 		break;
 
 	case VM_PKT_DATA_INBAND:
-		netvsc_receive_inband(device, net_device, nvmsg);
+		netvsc_receive_inband(device, net_device_ctx, nvmsg);
 		break;
 
 	default:
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 41bd952..794139b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -658,20 +658,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 	struct sk_buff *skb;
 	struct sk_buff *vf_skb;
 	struct netvsc_stats *rx_stats;
-	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
 	u32 bytes_recvd = packet->total_data_buflen;
 	int ret = 0;
 
 	if (!net || net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	if (READ_ONCE(netvsc_dev->vf_inject)) {
-		atomic_inc(&netvsc_dev->vf_use_cnt);
-		if (!READ_ONCE(netvsc_dev->vf_inject)) {
+	if (READ_ONCE(net_device_ctx->vf_inject)) {
+		atomic_inc(&net_device_ctx->vf_use_cnt);
+		if (!READ_ONCE(net_device_ctx->vf_inject)) {
 			/*
 			 * We raced; just move on.
 			 */
-			atomic_dec(&netvsc_dev->vf_use_cnt);
+			atomic_dec(&net_device_ctx->vf_use_cnt);
 			goto vf_injection_done;
 		}
 
@@ -683,17 +682,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 		 * the host). Deliver these via the VF interface
 		 * in the guest.
 		 */
-		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-					       csum_info, *data, vlan_tci);
+		vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
+					       packet, csum_info, *data,
+					       vlan_tci);
 		if (vf_skb != NULL) {
-			++netvsc_dev->vf_netdev->stats.rx_packets;
-			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
+			++net_device_ctx->vf_netdev->stats.rx_packets;
+			net_device_ctx->vf_netdev->stats.rx_bytes +=
+				bytes_recvd;
 			netif_receive_skb(vf_skb);
 		} else {
 			++net->stats.rx_dropped;
 			ret = NVSP_STAT_FAIL;
 		}
-		atomic_dec(&netvsc_dev->vf_use_cnt);
+		atomic_dec(&net_device_ctx->vf_use_cnt);
 		return ret;
 	}
 
@@ -1158,7 +1159,7 @@ static void netvsc_notify_peers(struct work_struct *wrk)
 
 	netdev_notify_peers(gwrk->netdev);
 
-	atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
+	atomic_dec(&gwrk->net_device_ctx->vf_use_cnt);
 }
 
 static struct net_device *get_netvsc_net_device(char *mac)
@@ -1211,7 +1212,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	 * Take a reference on the module.
 	 */
 	try_module_get(THIS_MODULE);
-	netvsc_dev->vf_netdev = vf_netdev;
+	net_device_ctx->vf_netdev = vf_netdev;
 	return NOTIFY_OK;
 }
 
@@ -1233,11 +1234,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = true;
+	net_device_ctx->vf_inject = true;
 
 	/*
 	 * Open the device before switching data path.
@@ -1257,9 +1258,9 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 	 * notify peers; take a reference to prevent
 	 * the VF interface from vanishing.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	atomic_inc(&net_device_ctx->vf_use_cnt);
 	net_device_ctx->gwrk.netdev = vf_netdev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
+	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
 
 	return NOTIFY_OK;
@@ -1283,17 +1284,17 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = false;
+	net_device_ctx->vf_inject = false;
 	/*
 	 * Wait for currently active users to
 	 * drain out.
 	 */
 
-	while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
+	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
 		udelay(50);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
@@ -1302,9 +1303,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	/*
 	 * Notify peers.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	atomic_inc(&net_device_ctx->vf_use_cnt);
 	net_device_ctx->gwrk.netdev = ndev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
+	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
 
 	return NOTIFY_OK;
@@ -1331,7 +1332,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
-	netvsc_dev->vf_netdev = NULL;
+	net_device_ctx->vf_netdev = NULL;
 	module_put(THIS_MODULE);
 	return NOTIFY_OK;
 }
@@ -1382,6 +1383,10 @@ static int netvsc_probe(struct hv_device *dev,
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
 
+	atomic_set(&net_device_ctx->vf_use_cnt, 0);
+	net_device_ctx->vf_netdev = NULL;
+	net_device_ctx->vf_inject = false;
+
 	net->netdev_ops = &device_ops;
 
 	net->hw_features = NETVSC_HW_FEATURES;
-- 
cgit v0.10.2


From d072218f214929194db06069564495b6b9fff34a Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 15 Aug 2016 17:48:40 +0200
Subject: hv_netvsc: avoid deadlocks between rtnl lock and vf_use_cnt wait

Here is a deadlock scenario:
- netvsc_vf_up() schedules netvsc_notify_peers() work and quits.
- netvsc_vf_down() runs before netvsc_notify_peers() gets executed. As it
  is being executed from netdev notifier chain we hold rtnl lock when we
  get here.
- we enter while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) loop and
  wait till netvsc_notify_peers() drops vf_use_cnt.
- netvsc_notify_peers() starts on some other CPU but netdev_notify_peers()
  will hang on rtnl_lock().
- deadlock!

Instead of introducing additional synchronization I suggest we drop
gwrk.dwrk completely and call NETDEV_NOTIFY_PEERS directly. As we're
acting under rtnl lock this is legitimate.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 3b3ecf2..591af71 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -644,12 +644,6 @@ struct netvsc_reconfig {
 	u32 event;
 };
 
-struct garp_wrk {
-	struct work_struct dwrk;
-	struct net_device *netdev;
-	struct net_device_context *net_device_ctx;
-};
-
 /* The context of the netvsc device  */
 struct net_device_context {
 	/* point back to our device context */
@@ -667,7 +661,6 @@ struct net_device_context {
 
 	struct work_struct work;
 	u32 msg_enable; /* debug level */
-	struct garp_wrk gwrk;
 
 	struct netvsc_stats __percpu *tx_stats;
 	struct netvsc_stats __percpu *rx_stats;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 794139b..70317fa 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1151,17 +1151,6 @@ static void netvsc_free_netdev(struct net_device *netdev)
 	free_netdev(netdev);
 }
 
-static void netvsc_notify_peers(struct work_struct *wrk)
-{
-	struct garp_wrk *gwrk;
-
-	gwrk = container_of(wrk, struct garp_wrk, dwrk);
-
-	netdev_notify_peers(gwrk->netdev);
-
-	atomic_dec(&gwrk->net_device_ctx->vf_use_cnt);
-}
-
 static struct net_device *get_netvsc_net_device(char *mac)
 {
 	struct net_device *dev, *found = NULL;
@@ -1253,15 +1242,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 
 	netif_carrier_off(ndev);
 
-	/*
-	 * Now notify peers. We are scheduling work to
-	 * notify peers; take a reference to prevent
-	 * the VF interface from vanishing.
-	 */
-	atomic_inc(&net_device_ctx->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = vf_netdev;
-	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+	/* Now notify peers through VF device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
 
 	return NOTIFY_OK;
 }
@@ -1300,13 +1282,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
 	netif_carrier_on(ndev);
-	/*
-	 * Notify peers.
-	 */
-	atomic_inc(&net_device_ctx->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = ndev;
-	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+
+	/* Now notify peers through netvsc device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
 
 	return NOTIFY_OK;
 }
@@ -1378,7 +1356,6 @@ static int netvsc_probe(struct hv_device *dev,
 
 	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
 	INIT_WORK(&net_device_ctx->work, do_set_multicast);
-	INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers);
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
-- 
cgit v0.10.2


From 57c1826b991244d2144eb6e3d5d1b13a53cbea63 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 15 Aug 2016 17:48:41 +0200
Subject: hv_netvsc: reset vf_inject on VF removal

We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on
VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while
vf_netdev is already NULL and we're trying to inject packets into NULL
net device in netvsc_recv_callback() causing kernel to crash.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 70317fa..2c90883 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1205,6 +1205,19 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	return NOTIFY_OK;
 }
 
+static void netvsc_inject_enable(struct net_device_context *net_device_ctx)
+{
+	net_device_ctx->vf_inject = true;
+}
+
+static void netvsc_inject_disable(struct net_device_context *net_device_ctx)
+{
+	net_device_ctx->vf_inject = false;
+
+	/* Wait for currently active users to drain out. */
+	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
+		udelay(50);
+}
 
 static int netvsc_vf_up(struct net_device *vf_netdev)
 {
@@ -1227,7 +1240,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	net_device_ctx->vf_inject = true;
+	netvsc_inject_enable(net_device_ctx);
 
 	/*
 	 * Open the device before switching data path.
@@ -1270,14 +1283,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	net_device_ctx->vf_inject = false;
-	/*
-	 * Wait for currently active users to
-	 * drain out.
-	 */
-
-	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
-		udelay(50);
+	netvsc_inject_disable(net_device_ctx);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
@@ -1309,7 +1315,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 	if (netvsc_dev == NULL)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
-
+	netvsc_inject_disable(net_device_ctx);
 	net_device_ctx->vf_netdev = NULL;
 	module_put(THIS_MODULE);
 	return NOTIFY_OK;
-- 
cgit v0.10.2


From 0f20d795f78d182c4b743d880a5e8dc4d39892fe Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 15 Aug 2016 17:48:42 +0200
Subject: hv_netvsc: protect module refcount by checking
 net_device_ctx->vf_netdev

We're not guaranteed to see NETDEV_REGISTER/NETDEV_UNREGISTER notifications
only once per VF but we increase/decrease module refcount unconditionally.
Check vf_netdev to make sure we don't take/release it twice. We presume
that only one VF per netvsc device may exist.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 2c90883..62a4e6e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1193,7 +1193,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
+	if (!netvsc_dev || net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1312,7 +1312,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 	netvsc_inject_disable(net_device_ctx);
-- 
cgit v0.10.2


From 0dbff144a1e7310e2f8b7a957352c4be9aeb38e4 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 15 Aug 2016 17:48:43 +0200
Subject: hv_netvsc: fix bonding devices check in netvsc_netdev_event()

Bonding driver sets IFF_BONDING on both master (the bonding device) and
slave (the real NIC) devices and in netvsc_netdev_event() we want to skip
master devices only. Currently, there is an uncertainty when a slave
interface is removed: if bonding module comes first in netdev_chain it
clears IFF_BONDING flag on the netdev and netvsc_netdev_event() correctly
handles NETDEV_UNREGISTER event, but in case netvsc comes first on the
chain it sees the device with IFF_BONDING still attached and skips it. As
we still hold vf_netdev pointer to the device we crash on the next inject.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 62a4e6e..3ba29fc 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1482,8 +1482,13 @@ static int netvsc_netdev_event(struct notifier_block *this,
 {
 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 
-	/* Avoid Vlan, Bonding dev with same MAC registering as VF */
-	if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING))
+	/* Avoid Vlan dev with same MAC registering as VF */
+	if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+		return NOTIFY_DONE;
+
+	/* Avoid Bonding master dev with same MAC registering as VF */
+	if (event_dev->priv_flags & IFF_BONDING &&
+	    event_dev->flags & IFF_MASTER)
 		return NOTIFY_DONE;
 
 	switch (event) {
-- 
cgit v0.10.2


From d2fbdf76b85bcdfe57b8ef2ba09d20e8ada79abd Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Sat, 23 Jul 2016 08:15:04 +0200
Subject: tipc: fix NULL pointer dereference in shutdown()

tipc_msg_create() can return a NULL skb and if so, we shouldn't try to
call tipc_node_xmit_skb() on it.

    general protection fault: 0000 [#1] PREEMPT SMP KASAN
    CPU: 3 PID: 30298 Comm: trinity-c0 Not tainted 4.7.0-rc7+ #19
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
    task: ffff8800baf09980 ti: ffff8800595b8000 task.ti: ffff8800595b8000
    RIP: 0010:[<ffffffff830bb46b>]  [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140
    RSP: 0018:ffff8800595bfce8  EFLAGS: 00010246
    RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003023b0e0
    RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff83d12580
    RBP: ffff8800595bfd78 R08: ffffed000b2b7f32 R09: 0000000000000000
    R10: fffffbfff0759725 R11: 0000000000000000 R12: 1ffff1000b2b7f9f
    R13: ffff8800595bfd58 R14: ffffffff83d12580 R15: dffffc0000000000
    FS:  00007fcdde242700(0000) GS:ffff88011af80000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00007fcddde1db10 CR3: 000000006874b000 CR4: 00000000000006e0
    DR0: 00007fcdde248000 DR1: 00007fcddd73d000 DR2: 00007fcdde248000
    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602
    Stack:
     0000000000000018 0000000000000018 0000000041b58ab3 ffffffff83954208
     ffffffff830bb400 ffff8800595bfd30 ffffffff8309d767 0000000000000018
     0000000000000018 ffff8800595bfd78 ffffffff8309da1a 00000000810ee611
    Call Trace:
     [<ffffffff830c84a3>] tipc_shutdown+0x553/0x880
     [<ffffffff825b4a3b>] SyS_shutdown+0x14b/0x170
     [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
     [<ffffffff83295ca5>] entry_SYSCALL64_slow_path+0x25/0x25
    Code: 90 00 b4 0b 83 c7 00 f1 f1 f1 f1 4c 8d 6d e0 c7 40 04 00 00 00 f4 c7 40 08 f3 f3 f3 f3 48 89 d8 48 c1 e8 03 c7 45 b4 00 00 00 00 <80> 3c 30 00 75 78 48 8d 7b 08 49 8d 75 c0 48 b8 00 00 00 00 00
    RIP  [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140
     RSP <ffff8800595bfce8>
    ---[ end trace 57b0484e351e71f1 ]---

I feel like we should maybe return -ENOMEM or -ENOBUFS, but I'm not sure
userspace is equipped to handle that. Anyway, this is better than a GPF
and looks somewhat consistent with other tipc_msg_create() callers.

Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c49b8df..f9f5f3c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2180,7 +2180,8 @@ restart:
 					      TIPC_CONN_MSG, SHORT_H_SIZE,
 					      0, dnode, onode, dport, oport,
 					      TIPC_CONN_SHUTDOWN);
-			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+			if (skb)
+				tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
 		}
 		tsk->connected = 0;
 		sock->state = SS_DISCONNECTING;
-- 
cgit v0.10.2


From 8ca7f4fe0733342c862b8585dd6eb6521b9bf533 Mon Sep 17 00:00:00 2001
From: "sean.wang@mediatek.com" <sean.wang@mediatek.com>
Date: Tue, 16 Aug 2016 13:55:13 +0800
Subject: net: ethernet: mediatek: fix RMII mode and add REVMII supported by
 GMAC

The patch fixes up the incorrect setup of reduced MII (RMII) on GMAC
and adds the supplement for the setup of reverse MII (REVMII) on GMAC
, and rearranges the error handling for invalid PHY argument.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 0030361..bd0ea05 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -245,12 +245,16 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 	case PHY_INTERFACE_MODE_MII:
 		ge_mode = 1;
 		break;
-	case PHY_INTERFACE_MODE_RMII:
+	case PHY_INTERFACE_MODE_REVMII:
 		ge_mode = 2;
 		break;
+	case PHY_INTERFACE_MODE_RMII:
+		if (!mac->id)
+			goto err_phy;
+		ge_mode = 3;
+		break;
 	default:
-		dev_err(eth->dev, "invalid phy_mode\n");
-		return -1;
+		goto err_phy;
 	}
 
 	/* put the gmac into the right mode */
@@ -272,6 +276,11 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 	of_node_put(np);
 
 	return 0;
+
+err_phy:
+	of_node_put(np);
+	dev_err(eth->dev, "invalid phy_mode\n");
+	return -EINVAL;
 }
 
 static int mtk_mdio_init(struct mtk_eth *eth)
-- 
cgit v0.10.2


From b2025c7cc92d5bfc8c5ce756c8d8a6f57c776fbd Mon Sep 17 00:00:00 2001
From: "sean.wang@mediatek.com" <sean.wang@mediatek.com>
Date: Tue, 16 Aug 2016 13:55:14 +0800
Subject: net: ethernet: mediatek: fix flow control settings on GMAC0 is not
 being enabled properly

Commit 08ef55c6f257acf3bdc6940813f80e8f0f5d90ec
("net-next: mediatek: fix gigabit and flow control advertisement")
had supported proper flow control settings for GMAC1. But for GMAC0,

1.GMAC0 shares the common logic with GMAC1 inside mtk_phy_link_adjust()
to adapt various settings for the target phy.

2.GMAC0 uses fixed-phy to connect to a builtin gigabit switch with
fixed link speed as commit 0c72c50f6f93b0c3daa9ea35d89ab3a933c7b5a0
("net-next: mediatek: add fixed-phy support") describes.

3.However, fixed-phy doesn't enable SUPPORTED_Pause & SUPPORTED_Asym_Pause
supported flag on default that would cause mtk_phy_link_adjust() not to
enable flow control setting on GMAC0 properly and cause packet dropped
when high traffic.

Due to these reasons, the patch adds SUPPORTED_Pause & SUPPORTED_Asym_Pause
supported flags on fixed-phy used by the driver to have proper handling on
the both GMAC with the shared common logic.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index bd0ea05..9901527 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -267,6 +267,11 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 	mac->phy_dev->autoneg = AUTONEG_ENABLE;
 	mac->phy_dev->speed = 0;
 	mac->phy_dev->duplex = 0;
+
+	if (of_phy_is_fixed_link(mac->of_node))
+		mac->phy_dev->supported |=
+		SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
 	mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
 				   SUPPORTED_Asym_Pause;
 	mac->phy_dev->advertising = mac->phy_dev->supported |
-- 
cgit v0.10.2


From 55a4e778191cfcf675aa1f9716edb71a3014d5fb Mon Sep 17 00:00:00 2001
From: "sean.wang@mediatek.com" <sean.wang@mediatek.com>
Date: Tue, 16 Aug 2016 13:55:15 +0800
Subject: net: ethernet: mediatek: fix runtime warning raised by inconsistent
 struct device pointers passed to DMA API

Runtime warning occurs if DMA-API debug feature is enabled that would be
raised by pointers passed to DMA API as arguments to inconsistent struct
device objects, so that the patch makes them usage aligned between DMA
operations such as dma_map_*() and dma_unmap_*() to eliminate the warning.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 9901527..f160954 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -558,15 +558,15 @@ static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring,
 	return &ring->buf[idx];
 }
 
-static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
 {
 	if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
-		dma_unmap_single(dev,
+		dma_unmap_single(eth->dev,
 				 dma_unmap_addr(tx_buf, dma_addr0),
 				 dma_unmap_len(tx_buf, dma_len0),
 				 DMA_TO_DEVICE);
 	} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
-		dma_unmap_page(dev,
+		dma_unmap_page(eth->dev,
 			       dma_unmap_addr(tx_buf, dma_addr0),
 			       dma_unmap_len(tx_buf, dma_len0),
 			       DMA_TO_DEVICE);
@@ -611,9 +611,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	if (skb_vlan_tag_present(skb))
 		txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
 
-	mapped_addr = dma_map_single(&dev->dev, skb->data,
+	mapped_addr = dma_map_single(eth->dev, skb->data,
 				     skb_headlen(skb), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+	if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
 		return -ENOMEM;
 
 	WRITE_ONCE(itxd->txd1, mapped_addr);
@@ -639,10 +639,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
 			n_desc++;
 			frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
-			mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
+			mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
 						       frag_map_size,
 						       DMA_TO_DEVICE);
-			if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+			if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
 				goto err_dma;
 
 			if (i == nr_frags - 1 &&
@@ -695,7 +695,7 @@ err_dma:
 		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
-		mtk_tx_unmap(&dev->dev, tx_buf);
+		mtk_tx_unmap(eth, tx_buf);
 
 		itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
 		itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
@@ -852,11 +852,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
-		dma_addr = dma_map_single(&eth->netdev[mac]->dev,
+		dma_addr = dma_map_single(eth->dev,
 					  new_data + NET_SKB_PAD,
 					  ring->buf_size,
 					  DMA_FROM_DEVICE);
-		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
+		if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
 			skb_free_frag(new_data);
 			netdev->stats.rx_dropped++;
 			goto release_desc;
@@ -871,7 +871,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 
-		dma_unmap_single(&netdev->dev, trxd.rxd1,
+		dma_unmap_single(eth->dev, trxd.rxd1,
 				 ring->buf_size, DMA_FROM_DEVICE);
 		pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
 		skb->dev = netdev;
@@ -953,7 +953,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 			done[mac]++;
 			budget--;
 		}
-		mtk_tx_unmap(eth->dev, tx_buf);
+		mtk_tx_unmap(eth, tx_buf);
 
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
@@ -1108,7 +1108,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
 
 	if (ring->buf) {
 		for (i = 0; i < MTK_DMA_SIZE; i++)
-			mtk_tx_unmap(eth->dev, &ring->buf[i]);
+			mtk_tx_unmap(eth, &ring->buf[i]);
 		kfree(ring->buf);
 		ring->buf = NULL;
 	}
-- 
cgit v0.10.2


From 0066c8b6f4050d7c57f6379d6fd4535e2f267f17 Mon Sep 17 00:00:00 2001
From: Kshitiz Gupta <kshitiz.gupta@ni.com>
Date: Sat, 16 Jul 2016 02:23:45 -0500
Subject: igb: fix adjusting PTP timestamps for Tx/Rx latency

Fix PHY delay compensation math in igb_ptp_tx_hwtstamp() and
igb_ptp_rx_rgtstamp. Add PHY delay compensation in
igb_ptp_rx_pktstamp().

In the IGB driver, there are two functions that retrieve timestamps
received by the PHY - igb_ptp_rx_rgtstamp() and igb_ptp_rx_pktstamp().
The previous commit only changed igb_ptp_rx_rgtstamp(), and the change
was incorrect.

There are two instances in which PHY delay compensations should be
made:

- Before the packet transmission over the PHY, the latency between
  when the packet is timestamped and transmission of the packets,
  should be an add operation, but it is currently a subtract.

- After the packets are received from the PHY, the latency between
  the receiving and timestamping of the packets should be a subtract
  operation, but it is currently an add.

Signed-off-by: Kshitiz Gupta <kshitiz.gupta@ni.com>
Fixes: 3f544d2 (igb: adjust ptp timestamps for tx/rx latency)
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index e61b647..336c103 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -744,7 +744,8 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
 		}
 	}
 
-	shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust);
+	shhwtstamps.hwtstamp =
+		ktime_add_ns(shhwtstamps.hwtstamp, adjust);
 
 	skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
 	dev_kfree_skb_any(adapter->ptp_tx_skb);
@@ -767,13 +768,32 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
 			 struct sk_buff *skb)
 {
 	__le64 *regval = (__le64 *)va;
+	struct igb_adapter *adapter = q_vector->adapter;
+	int adjust = 0;
 
 	/* The timestamp is recorded in little endian format.
 	 * DWORD: 0        1        2        3
 	 * Field: Reserved Reserved SYSTIML  SYSTIMH
 	 */
-	igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
 				   le64_to_cpu(regval[1]));
+
+	/* adjust timestamp for the RX latency based on link speed */
+	if (adapter->hw.mac.type == e1000_i210) {
+		switch (adapter->link_speed) {
+		case SPEED_10:
+			adjust = IGB_I210_RX_LATENCY_10;
+			break;
+		case SPEED_100:
+			adjust = IGB_I210_RX_LATENCY_100;
+			break;
+		case SPEED_1000:
+			adjust = IGB_I210_RX_LATENCY_1000;
+			break;
+		}
+	}
+	skb_hwtstamps(skb)->hwtstamp =
+		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 }
 
 /**
@@ -825,7 +845,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
 		}
 	}
 	skb_hwtstamps(skb)->hwtstamp =
-		ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 
 	/* Update the last_rx_timestamp timer in order to enable watchdog check
 	 * for error case of latched timestamp on a dropped packet.
-- 
cgit v0.10.2


From 0be5b96cd8400aeb4bf3f8c5e7f5efaa38ae5055 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Tue, 26 Jul 2016 14:25:34 -0400
Subject: e1000e: factor out systim sanitization

This is prepatory work for an expanding list of adapter families that have
occasional ~10 hour clock jumps when being used for PTP. Factor out the
sanitization function and convert to using a feature (bug) flag, per
suggestion from Jesse Brandeburg.

Littering functional code with device-specific checks is much messier than
simply checking a flag, and having device-specific init set flags as needed.
There are probably a number of other cases in the e1000e code that
could/should be converted similarly.

Suggested-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 7fd4d54..6b03c85 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -2032,7 +2032,8 @@ const struct e1000_info e1000_82574_info = {
 				  | FLAG2_DISABLE_ASPM_L0S
 				  | FLAG2_DISABLE_ASPM_L1
 				  | FLAG2_NO_DISABLE_RX
-				  | FLAG2_DMA_BURST,
+				  | FLAG2_DMA_BURST
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
 	.pba			= 32,
 	.max_hw_frame_size	= DEFAULT_JUMBO,
 	.get_variants		= e1000_get_variants_82571,
@@ -2053,7 +2054,8 @@ const struct e1000_info e1000_82583_info = {
 				  | FLAG_HAS_CTRLEXT_ON_LOAD,
 	.flags2			= FLAG2_DISABLE_ASPM_L0S
 				  | FLAG2_DISABLE_ASPM_L1
-				  | FLAG2_NO_DISABLE_RX,
+				  | FLAG2_NO_DISABLE_RX
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
 	.pba			= 32,
 	.max_hw_frame_size	= DEFAULT_JUMBO,
 	.get_variants		= e1000_get_variants_82571,
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index ef96cd1..879cca4 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -452,6 +452,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca);
 #define FLAG2_PCIM2PCI_ARBITER_WA         BIT(11)
 #define FLAG2_DFLT_CRC_STRIPPING          BIT(12)
 #define FLAG2_CHECK_RX_HWTSTAMP           BIT(13)
+#define FLAG2_CHECK_SYSTIM_OVERFLOW       BIT(14)
 
 #define E1000_RX_DESC_PS(R, i)	    \
 	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 02f4439..7017281 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4303,6 +4303,42 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
 }
 
 /**
+ * e1000e_sanitize_systim - sanitize raw cycle counter reads
+ * @hw: pointer to the HW structure
+ * @systim: cycle_t value read, sanitized and returned
+ *
+ * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
+ * check to see that the time is incrementing at a reasonable
+ * rate and is a multiple of incvalue.
+ **/
+static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim)
+{
+	u64 time_delta, rem, temp;
+	cycle_t systim_next;
+	u32 incvalue;
+	int i;
+
+	incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
+	for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
+		/* latch SYSTIMH on read of SYSTIML */
+		systim_next = (cycle_t)er32(SYSTIML);
+		systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+
+		time_delta = systim_next - systim;
+		temp = time_delta;
+		/* VMWare users have seen incvalue of zero, don't div / 0 */
+		rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
+
+		systim = systim_next;
+
+		if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0))
+			break;
+	}
+
+	return systim;
+}
+
+/**
  * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
  * @cc: cyclecounter structure
  **/
@@ -4312,7 +4348,7 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
 						     cc);
 	struct e1000_hw *hw = &adapter->hw;
 	u32 systimel, systimeh;
-	cycle_t systim, systim_next;
+	cycle_t systim;
 	/* SYSTIMH latching upon SYSTIML read does not work well.
 	 * This means that if SYSTIML overflows after we read it but before
 	 * we read SYSTIMH, the value of SYSTIMH has been incremented and we
@@ -4335,33 +4371,9 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
 	systim = (cycle_t)systimel;
 	systim |= (cycle_t)systimeh << 32;
 
-	if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
-		u64 time_delta, rem, temp;
-		u32 incvalue;
-		int i;
-
-		/* errata for 82574/82583 possible bad bits read from SYSTIMH/L
-		 * check to see that the time is incrementing at a reasonable
-		 * rate and is a multiple of incvalue
-		 */
-		incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
-		for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
-			/* latch SYSTIMH on read of SYSTIML */
-			systim_next = (cycle_t)er32(SYSTIML);
-			systim_next |= (cycle_t)er32(SYSTIMH) << 32;
-
-			time_delta = systim_next - systim;
-			temp = time_delta;
-			/* VMWare users have seen incvalue of zero, don't div / 0 */
-			rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
-
-			systim = systim_next;
+	if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
+		systim = e1000e_sanitize_systim(hw, systim);
 
-			if ((time_delta < E1000_82574_SYSTIM_EPSILON) &&
-			    (rem == 0))
-				break;
-		}
-	}
 	return systim;
 }
 
-- 
cgit v0.10.2


From 8037dd60f45264c3fbbea4cc0cea5f2f0a774b5e Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Tue, 26 Jul 2016 14:25:35 -0400
Subject: e1000e: fix PTP on e1000_pch_lpt variants

I've got reports that the Intel I-218V NIC in Intel NUC5i5RYH systems used
as a PTP slave experiences random ~10 hour clock jumps, which are resolved
if the same workaround for the 82574 and 82583 is employed, so set the
appropriate flag2 in e1000_pch_lpt_info too.

Reported-by: Rupesh Patel <rupatel@redhat.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 3e11322..f3aaca7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -5885,7 +5885,8 @@ const struct e1000_info e1000_pch_lpt_info = {
 				  | FLAG_HAS_JUMBO_FRAMES
 				  | FLAG_APME_IN_WUC,
 	.flags2			= FLAG2_HAS_PHY_STATS
-				  | FLAG2_HAS_EEE,
+				  | FLAG2_HAS_EEE
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
 	.pba			= 26,
 	.max_hw_frame_size	= 9022,
 	.get_variants		= e1000_get_variants_ich8lan,
-- 
cgit v0.10.2


From f60439bc21e3337429838e477903214f5bd8277f Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 11 Aug 2016 14:51:56 -0700
Subject: ixgbe: Force VLNCTRL.VFE to be set in all VMDq paths

When I was adding the code for enabling VLAN promiscuous mode with SR-IOV
enabled I had inadvertently left the VLNCTRL.VFE bit unchanged as I has
assumed there was code in another path that was setting it when we enabled
SR-IOV.  This wasn't the case and as a result we were just disabling VLAN
filtering for all the VFs apparently.

Also the previous patches were always clearing CFIEN which was always set
to 0 by the hardware anyway so I am dropping the redundant bit clearing.

Fixes: 16369564915a ("ixgbe: Add support for VLAN promiscuous with SR-IOV")
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5418c69a..e0fdef8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4100,6 +4100,8 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 vlnctrl, i;
 
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
 	switch (hw->mac.type) {
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
@@ -4112,8 +4114,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
 		/* fall through */
 	case ixgbe_mac_82598EB:
 		/* legacy case, we can just disable VLAN filtering */
-		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-		vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+		vlnctrl &= ~IXGBE_VLNCTRL_VFE;
 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 		return;
 	}
@@ -4125,6 +4126,10 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
 	/* Set flag so we don't redo unnecessary work */
 	adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
 
+	/* For VMDq and SR-IOV we must leave VLAN filtering enabled */
+	vlnctrl |= IXGBE_VLNCTRL_VFE;
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
 	/* Add PF to all active pools */
 	for (i = IXGBE_VLVF_ENTRIES; --i;) {
 		u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
@@ -4191,6 +4196,11 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 vlnctrl, i;
 
+	/* Set VLAN filtering to enabled */
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+	vlnctrl |= IXGBE_VLNCTRL_VFE;
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
 	switch (hw->mac.type) {
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
@@ -4202,10 +4212,6 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
 			break;
 		/* fall through */
 	case ixgbe_mac_82598EB:
-		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-		vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
-		vlnctrl |= IXGBE_VLNCTRL_VFE;
-		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 		return;
 	}
 
-- 
cgit v0.10.2


From 3d951822be216d8c6fcfc8abf75e5ed307eeb646 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Aug 2016 09:53:39 -0700
Subject: ixgbe: Re-enable ability to toggle VLAN filtering

Back when I submitted the GSO code I messed up and dropped the support for
disabling the VLAN tag filtering via the feature bit.  This patch
re-enables the use of the NETIF_F_HW_VLAN_CTAG_FILTER to enable/disable the
VLAN filtering independent of toggling promiscuous mode.

Fixes: b83e30104b ("ixgbe/ixgbevf: Add support for GSO partial")
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index e0fdef8..ee57a89 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9523,6 +9523,7 @@ skip_sriov:
 
 	/* copy netdev features into list of user selectable features */
 	netdev->hw_features |= netdev->features |
+			       NETIF_F_HW_VLAN_CTAG_FILTER |
 			       NETIF_F_HW_VLAN_CTAG_RX |
 			       NETIF_F_HW_VLAN_CTAG_TX |
 			       NETIF_F_RXALL |
-- 
cgit v0.10.2


From fbfe12c64f9650aa22f434dd9dd22df7ddf63221 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Fri, 12 Aug 2016 09:56:32 -0700
Subject: i40e: check for and deal with non-contiguous TCs

The i40e driver was causing a kernel panic when
non-contiguous Traffic Classes, or Traffic Classes not
starting with TC0, were configured on a link partner switch.
i40e does not support non-contiguous TCs.

To fix this, the patch changes the logic when determining
the total number of TCs enabled.  Before, this would use the
highest TC number enabled and assume that all TCs below it were
also enabled.  Now, we create a bitmask of enabled TCs and scan
it to determine not only the number of TCs, but also if the set
of enabled TCs starts at zero and is contiguous.  If not, then
DCB is disabled by only returning one TC.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 81c99e1..c6ac7a6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4554,23 +4554,38 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
  **/
 static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
 {
+	int i, tc_unused = 0;
 	u8 num_tc = 0;
-	int i;
+	u8 ret = 0;
 
 	/* Scan the ETS Config Priority Table to find
 	 * traffic class enabled for a given priority
-	 * and use the traffic class index to get the
-	 * number of traffic classes enabled
+	 * and create a bitmask of enabled TCs
 	 */
-	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-		if (dcbcfg->etscfg.prioritytable[i] > num_tc)
-			num_tc = dcbcfg->etscfg.prioritytable[i];
-	}
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+		num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
 
-	/* Traffic class index starts from zero so
-	 * increment to return the actual count
+	/* Now scan the bitmask to check for
+	 * contiguous TCs starting with TC0
 	 */
-	return num_tc + 1;
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (num_tc & BIT(i)) {
+			if (!tc_unused) {
+				ret++;
+			} else {
+				pr_err("Non-contiguous TC - Disabling DCB\n");
+				return 1;
+			}
+		} else {
+			tc_unused = 1;
+		}
+	}
+
+	/* There is always at least TC0 */
+	if (!ret)
+		ret = 1;
+
+	return ret;
 }
 
 /**
-- 
cgit v0.10.2


From 7a35583ec5b64f17559c9de8d7c47f7360e40362 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:28 +0200
Subject: mlxsw: spectrum: Don't return upon error in removal path

When removing a VLAN filter from the device we shouldn't return upon the
first error we encounter, as otherwise we'll have resources that will
never be freed nor used.

Instead, we should keep trying to free as much resources as possible in
a best effort mode.

Remove the error message as well, since we already get these from the
EMAD transaction code.

Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index e1b8f62..76b53ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1010,7 +1010,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port *mlxsw_sp_vport;
 	struct mlxsw_sp_fid *f;
-	int err;
 
 	/* VLAN 0 is removed from HW filter when device goes down, but
 	 * it is reserved in our case, so simply return.
@@ -1019,23 +1018,12 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
 		return 0;
 
 	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
-	if (!mlxsw_sp_vport) {
-		netdev_warn(dev, "VID=%d does not exist\n", vid);
+	if (WARN_ON(!mlxsw_sp_vport))
 		return 0;
-	}
 
-	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
-	if (err) {
-		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-			   vid);
-		return err;
-	}
+	mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 
-	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
-	if (err) {
-		netdev_err(dev, "Failed to enable learning for VID=%d\n", vid);
-		return err;
-	}
+	mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
 
 	/* Drop FID reference. If this was the last reference the
 	 * resources will be freed.
@@ -1048,13 +1036,8 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
 	 * transition all active 802.1Q bridge VLANs to use VID to FID
 	 * mappings and set port's mode to VLAN mode.
 	 */
-	if (list_is_singular(&mlxsw_sp_port->vports_list)) {
-		err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
-		if (err) {
-			netdev_err(dev, "Failed to set to VLAN mode\n");
-			return err;
-		}
-	}
+	if (list_is_singular(&mlxsw_sp_port->vports_list))
+		mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
 
 	mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
 
-- 
cgit v0.10.2


From fa66d7e3fea7504e241e9004998af2c71814da18 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:29 +0200
Subject: mlxsw: spectrum: Remove redundant errors from the code

Currently, when device configuration fails we emit errors to the kernel
log despite the fact we already get these from the EMAD transaction
layer, so remove them.

In addition to being unnecessary, removing these error messages will
allow us to reuse mlxsw_sp_port_add_vid() to create the PVID vPort
before registering the netdevice.

Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 76b53ed..a9281afc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -956,16 +956,12 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 	if (!vid)
 		return 0;
 
-	if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) {
-		netdev_warn(dev, "VID=%d already configured\n", vid);
+	if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid))
 		return 0;
-	}
 
 	mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid);
-	if (!mlxsw_sp_vport) {
-		netdev_err(dev, "Failed to create vPort for VID=%d\n", vid);
+	if (!mlxsw_sp_vport)
 		return -ENOMEM;
-	}
 
 	/* When adding the first VLAN interface on a bridged port we need to
 	 * transition all the active 802.1Q bridge VLANs to use explicit
@@ -973,24 +969,17 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
 	 */
 	if (list_is_singular(&mlxsw_sp_port->vports_list)) {
 		err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
-		if (err) {
-			netdev_err(dev, "Failed to set to Virtual mode\n");
+		if (err)
 			goto err_port_vp_mode_trans;
-		}
 	}
 
 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
-	if (err) {
-		netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
+	if (err)
 		goto err_port_vid_learning_set;
-	}
 
 	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
-	if (err) {
-		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-			   vid);
+	if (err)
 		goto err_port_add_vid;
-	}
 
 	return 0;
 
-- 
cgit v0.10.2


From 05978481e77e47b0bcb1767d3783fa0e5a18f399 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:30 +0200
Subject: mlxsw: spectrum: Create PVID vPort before registering netdevice

After registering a netdevice it's possible for user space applications
to configure an IP address on it. From the driver's perspective, this
means a router interface (RIF) should be created for the PVID vPort.

Therefore, we must create the PVID vPort before registering the
netdevice.

Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index a9281afc..0677f3f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -942,8 +942,8 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport)
 	kfree(mlxsw_sp_vport);
 }
 
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-			  u16 vid)
+static int mlxsw_sp_port_add_vid(struct net_device *dev,
+				 __be16 __always_unused proto, u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port *mlxsw_sp_vport;
@@ -2048,6 +2048,18 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	return 0;
 }
 
+static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	mlxsw_sp_port->pvid = 1;
+
+	return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
+static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
 static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				bool split, u8 module, u8 width, u8 lane)
 {
@@ -2163,6 +2175,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_port_dcb_init;
 	}
 
+	err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_pvid_vport_create;
+	}
+
 	mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
 	err = register_netdev(dev);
 	if (err) {
@@ -2180,18 +2199,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_core_port_init;
 	}
 
-	err = mlxsw_sp_port_vlan_init(mlxsw_sp_port);
-	if (err)
-		goto err_port_vlan_init;
-
 	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	return 0;
 
-err_port_vlan_init:
-	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 err_core_port_init:
 	unregister_netdev(dev);
 err_register_netdev:
+	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+err_port_pvid_vport_create:
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 err_port_dcb_init:
 err_port_ets_init:
@@ -2221,8 +2236,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
+	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-	mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f69aa37..ab3feb8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -536,8 +536,6 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				 u16 vid);
 int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
 			   u16 vid_end, bool is_member, bool untagged);
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-			  u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
 			     bool set);
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index a1ad5e6..b5e864d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -997,13 +997,13 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
 }
 
 static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
-				     u16 vid_begin, u16 vid_end, bool init)
+				     u16 vid_begin, u16 vid_end)
 {
 	struct net_device *dev = mlxsw_sp_port->dev;
 	u16 vid, pvid;
 	int err;
 
-	if (!init && !mlxsw_sp_port->bridged)
+	if (!mlxsw_sp_port->bridged)
 		return -EINVAL;
 
 	err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
@@ -1014,9 +1014,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 		return err;
 	}
 
-	if (init)
-		goto out;
-
 	pvid = mlxsw_sp_port->pvid;
 	if (pvid >= vid_begin && pvid <= vid_end) {
 		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
@@ -1028,7 +1025,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
 
-out:
 	/* Changing activity bits only if HW operation succeded */
 	for (vid = vid_begin; vid <= vid_end; vid++)
 		clear_bit(vid, mlxsw_sp_port->active_vlans);
@@ -1039,8 +1035,8 @@ out:
 static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 				   const struct switchdev_obj_port_vlan *vlan)
 {
-	return __mlxsw_sp_port_vlans_del(mlxsw_sp_port,
-					 vlan->vid_begin, vlan->vid_end, false);
+	return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin,
+					 vlan->vid_end);
 }
 
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -1048,7 +1044,7 @@ void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
 	u16 vid;
 
 	for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
-		__mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+		__mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid);
 }
 
 static int
@@ -1546,32 +1542,6 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp_fdb_fini(mlxsw_sp);
 }
 
-int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-	struct net_device *dev = mlxsw_sp_port->dev;
-	int err;
-
-	/* Allow only untagged packets to ingress and tag them internally
-	 * with VID 1.
-	 */
-	mlxsw_sp_port->pvid = 1;
-	err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1,
-					true);
-	if (err) {
-		netdev_err(dev, "Unable to init VLANs\n");
-		return err;
-	}
-
-	/* Add implicit VLAN interface in the device, so that untagged
-	 * packets will be classified to the default vFID.
-	 */
-	err = mlxsw_sp_port_add_vid(dev, 0, 1);
-	if (err)
-		netdev_err(dev, "Failed to configure default vFID\n");
-
-	return err;
-}
-
 void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
-- 
cgit v0.10.2


From 2f25844c233650b2abb92b66b3d0af7d73b5f88f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:31 +0200
Subject: mlxsw: spectrum: Mark port as active before registering it

Commit bbf2a4757b30 ("mlxsw: spectrum: Initialize ports at the end of
init sequence") moved ports initialization to the end of the init
sequence, which means ports are the first to be removed during fini.

Since the FDB delayed work is still active when ports are removed it's
possible for it to process FDB notifications of inactive ports,
resulting in a warning message.

Fix that by marking ports as inactive only after unregistering them. The
NETDEV_UNREGISTER event will invoke bridge's driver port removal
sequence that will cause the FDB (and FDB notifications) to be flushed.

Fixes: bbf2a4757b30 ("mlxsw: spectrum: Initialize ports at the end of init sequence")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 0677f3f..12681db 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2183,6 +2183,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	}
 
 	mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
+	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
@@ -2199,12 +2200,12 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_core_port_init;
 	}
 
-	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	return 0;
 
 err_core_port_init:
 	unregister_netdev(dev);
 err_register_netdev:
+	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
 err_port_pvid_vport_create:
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
@@ -2233,9 +2234,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 
 	if (!mlxsw_sp_port)
 		return;
-	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
+	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
-- 
cgit v0.10.2


From c20b80187a93b4fcc1c5c46fc8a436df1f17636d Mon Sep 17 00:00:00 2001
From: Elad Raz <eladr@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:32 +0200
Subject: mlxsw: spectrum: Add missing packet traps

Add the following traps:

1) MTU Error: Trap packets whose size is bigger than the egress RIF's
MTU. If DF bit isn't set, traffic will continue to be routed in slow
path.

2) TTL Error: Trap packets whose TTL expired. This allows traceroute to
work properly.

3) OSPF packets.

Fixes: 7b27ce7bb9cd ("mlxsw: spectrum: Add traps needed for router implementation")
Signed-off-by: Elad Raz <eladr@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 12681db..6b69c8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2651,6 +2651,21 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
 	{
 		.func = mlxsw_sp_rx_listener_func,
 		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_MTUERROR,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_TTLERROR,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_OSPF,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
 		.trap_id = MLXSW_TRAP_ID_IP2ME,
 	},
 	{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 470d769..9508e0a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -56,6 +56,9 @@ enum {
 	MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
 	MLXSW_TRAP_ID_ARPBC = 0x50,
 	MLXSW_TRAP_ID_ARPUC = 0x51,
+	MLXSW_TRAP_ID_MTUERROR = 0x52,
+	MLXSW_TRAP_ID_TTLERROR = 0x53,
+	MLXSW_TRAP_ID_OSPF = 0x55,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
 	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
-- 
cgit v0.10.2


From a94a614fa2bd32848a67f8261228e193beb826ca Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:33 +0200
Subject: mlxsw: spectrum: Trap loop-backed packets

One of the conditions to generate an ICMP Redirect Message is that "the
packet is being forwarded out the same physical interface that it was
received from" (RFC 1812).

Therefore, we need to be able to trap such packets and let the kernel
decide what to do with them.

For each RIF, enable the loop-back filter, which will raise the LBERROR
trap whenever the ingress RIF equals the egress RIF.

Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces")
Reported-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 7ca9201..a1bd36c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3383,6 +3383,15 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
 
+/* reg_ritr_lb_en
+ * Loop-back filter enable for unicast packets.
+ * If the flag is set then loop-back filter for unicast packets is
+ * implemented on the RIF. Multicast packets are always subject to
+ * loop-back filtering.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1);
+
 /* reg_ritr_virtual_router
  * Virtual router ID associated with the router interface.
  * Access: RW
@@ -3484,6 +3493,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
 	mlxsw_reg_ritr_op_set(payload, op);
 	mlxsw_reg_ritr_rif_set(payload, rif);
 	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+	mlxsw_reg_ritr_lb_en_set(payload, 1);
 	mlxsw_reg_ritr_mtu_set(payload, mtu);
 	mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6b69c8a..8137daa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2661,6 +2661,11 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
 	{
 		.func = mlxsw_sp_rx_listener_func,
 		.local_port = MLXSW_PORT_DONT_CARE,
+		.trap_id = MLXSW_TRAP_ID_LBERROR,
+	},
+	{
+		.func = mlxsw_sp_rx_listener_func,
+		.local_port = MLXSW_PORT_DONT_CARE,
 		.trap_id = MLXSW_TRAP_ID_OSPF,
 	},
 	{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 9508e0a..ed8e301 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -58,6 +58,7 @@ enum {
 	MLXSW_TRAP_ID_ARPUC = 0x51,
 	MLXSW_TRAP_ID_MTUERROR = 0x52,
 	MLXSW_TRAP_ID_TTLERROR = 0x53,
+	MLXSW_TRAP_ID_LBERROR = 0x54,
 	MLXSW_TRAP_ID_OSPF = 0x55,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
-- 
cgit v0.10.2


From 0e7df1a290abbcf3ecf697bbbbd4549c9a113db0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:34 +0200
Subject: mlxsw: reg: Fix missing op field fill-up

Ralue pack function needs to set op, otherwise it is 0 for add always.

Fixes: d5a1c749d22 ("mlxsw: reg: Add Router Algorithmic LPM Unicast Entry Register definition")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index a1bd36c..1721098 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4010,6 +4010,7 @@ static inline void mlxsw_reg_ralue_pack(char *payload,
 {
 	MLXSW_REG_ZERO(ralue, payload);
 	mlxsw_reg_ralue_protocol_set(payload, protocol);
+	mlxsw_reg_ralue_op_set(payload, op);
 	mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
 	mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
 	mlxsw_reg_ralue_entry_type_set(payload,
-- 
cgit v0.10.2


From 0583272d91f0f4e21f1eb666786286863185be7e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:35 +0200
Subject: mlxsw: spectrum: Add missing rollbacks in error path

While going over the code I noticed we are missing two rollbacks in the
port's creation error path. Add them and adjust the place of one of them
in the port's removal sequence so that both are symmetric.

Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8137daa..1fe9fbd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2206,6 +2206,7 @@ err_core_port_init:
 	unregister_netdev(dev);
 err_register_netdev:
 	mlxsw_sp->ports[local_port] = NULL;
+	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
 err_port_pvid_vport_create:
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
@@ -2215,6 +2216,7 @@ err_port_buffers_init:
 err_port_admin_status_set:
 err_port_mtu_set:
 err_port_speed_by_width_set:
+	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 err_port_swid_set:
 err_port_system_port_mapping_set:
 err_dev_addr_init:
@@ -2237,9 +2239,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp->ports[local_port] = NULL;
+	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
 	free_percpu(mlxsw_sp_port->pcpu_stats);
-- 
cgit v0.10.2


From 8168287b5dfac9227a549ed87f5e111b7005e8a4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:36 +0200
Subject: mlxsw: spectrum: Unmap 802.1Q FID before destroying it

Before destroying the 802.1Q FID we should first remove the VID-to-FID
mapping. This makes mlxsw_sp_fid_destroy() symmetric with regards to
mlxsw_sp_fid_create().

Fixes: 14d39461b3f4 ("mlxsw: spectrum: Use per-FID struct for the VLAN-aware bridge")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index b5e864d..d1b59cd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -450,6 +450,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f)
 
 	kfree(f);
 
+	mlxsw_sp_fid_map(mlxsw_sp, fid, false);
+
 	mlxsw_sp_fid_op(mlxsw_sp, fid, false);
 }
 
-- 
cgit v0.10.2


From 9ffcc3725f096e9f0d985f738b0e44214cd72d93 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Aug 2016 16:39:37 +0200
Subject: mlxsw: spectrum: Allow packets to be trapped from any PG

When packets enter the device they are classified to a priority group
(PG) buffer based on their PCP value. After their egress port and
traffic class are determined they are moved to the switch's shared
buffer and await transmission, if:

(Ingress{Port}.Usage < Thres && Ingress{Port,PG}.Usage < Thres &&
 Egress{Port}.Usage < Thres && Egress{Port,TC}.Usage < Thres)
||
(Ingress{Port}.Usage < Min || Ingress{Port,PG} < Min ||
 Egress{Port}.Usage < Min || Egress{Port,TC}.Usage < Min)

Packets scheduled to transmission through CPU port (trapped to CPU) use
traffic class 7, which has a zero maximum and minimum quotas. However,
when such packets arrive from PG 0 they are admitted to the shared
buffer as PG 0 has a non-zero minimum quota.

Allow all packets to be trapped to the CPU - regardless of the PG they
were classified to - by assigning a 10KB minimum quota for CPU port and
TC7.

Fixes: 8e8dfe9fdf06 ("mlxsw: spectrum: Add IEEE 802.1Qaz ETS support")
Reported-by: Tamir Winetroub <tamirw@mellanox.com>
Tested-by: Tamir Winetroub <tamirw@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 074cdda..237418a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -330,7 +330,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_CPU_PORT_SB_CM,
+	MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0),
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
-- 
cgit v0.10.2


From f07fed82ad7994cc4d779ee79bdf7a46848c4b8f Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 13 Aug 2016 22:34:56 -0700
Subject: net_sched: remove the leftover cleanup_a()

After refactoring tc_action into tcf_common, we no
longer need to cleanup temporary "actions" in list,
they are permanently stored in the hashtable.

Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common")
Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e4a5f26..cce6986 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -754,16 +754,6 @@ err_out:
 	return ERR_PTR(err);
 }
 
-static void cleanup_a(struct list_head *actions)
-{
-	struct tc_action *a, *tmp;
-
-	list_for_each_entry_safe(a, tmp, actions, list) {
-		list_del(&a->list);
-		kfree(a);
-	}
-}
-
 static int tca_action_flush(struct net *net, struct nlattr *nla,
 			    struct nlmsghdr *n, u32 portid)
 {
@@ -905,7 +895,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		return ret;
 	}
 err:
-	cleanup_a(&actions);
+	tcf_action_destroy(&actions, 0);
 	return ret;
 }
 
@@ -942,15 +932,9 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
 	ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
 	if (ret)
-		goto done;
+		return ret;
 
-	/* dump then free all the actions after update; inserted policy
-	 * stays intact
-	 */
-	ret = tcf_add_notify(net, n, &actions, portid);
-	cleanup_a(&actions);
-done:
-	return ret;
+	return tcf_add_notify(net, n, &actions, portid);
 }
 
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
-- 
cgit v0.10.2


From 824a7e8863b3eb283343f891b11a782b4ec0d0de Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 13 Aug 2016 22:34:57 -0700
Subject: net_sched: remove an unnecessary list_del()

This list_del() for tc action is not needed actually,
because we only use this list to chain bulk operations,
therefore should not be carried for latter operations.

Fixes: ec0595cc4495 ("net_sched: get rid of struct tcf_common")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cce6986..b4c7be3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -64,7 +64,6 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
 		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
 			if (p->ops->cleanup)
 				p->ops->cleanup(p, bind);
-			list_del(&p->list);
 			tcf_hash_destroy(p->hinfo, p);
 			ret = ACT_P_DELETED;
 		}
-- 
cgit v0.10.2


From 0c23c3e705691cfb99c94f2760df2b456fe45194 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 13 Aug 2016 22:34:58 -0700
Subject: net_sched: fix a typo in tc_for_each_action()

It is harmless because all users pass 'a' to this macro.

Fixes: 00175aec941e ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef")
Cc: Amir Vadai <amir@vadai.me>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 41e6a24..f53ee9d 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -193,7 +193,7 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 	(list_empty(&(_exts)->actions))
 
 #define tc_for_each_action(_a, _exts) \
-	list_for_each_entry(a, &(_exts)->actions, list)
+	list_for_each_entry(_a, &(_exts)->actions, list)
 
 #define tc_single_action(_exts) \
 	(list_is_singular(&(_exts)->actions))
-- 
cgit v0.10.2


From 2734437ef3c2943090d0914bf91caa6b30451615 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 13 Aug 2016 22:34:59 -0700
Subject: net_sched: move tc offload macros to pkt_cls.h

struct tcf_exts belongs to filters, should not be visible
to plain tc actions.

Cc: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/act_api.h b/include/net/act_api.h
index f53ee9d..870332f 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -189,30 +189,17 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 
-#define tc_no_actions(_exts) \
-	(list_empty(&(_exts)->actions))
-
-#define tc_for_each_action(_a, _exts) \
-	list_for_each_entry(_a, &(_exts)->actions, list)
-
-#define tc_single_action(_exts) \
-	(list_is_singular(&(_exts)->actions))
+#endif /* CONFIG_NET_CLS_ACT */
 
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 					   u64 packets, u64 lastuse)
 {
+#ifdef CONFIG_NET_CLS_ACT
 	if (!a->ops->stats_update)
 		return;
 
 	a->ops->stats_update(a, bytes, packets, lastuse);
+#endif
 }
 
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
-#define tc_single_action(_exts) false
-#define tcf_action_stats_update(a, bytes, packets, lastuse)
-
-#endif /* CONFIG_NET_CLS_ACT */
 #endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6f8d653..00dd5c4 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -130,6 +130,25 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 	return 0;
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+
+#define tc_no_actions(_exts) \
+	(list_empty(&(_exts)->actions))
+
+#define tc_for_each_action(_a, _exts) \
+	list_for_each_entry(_a, &(_exts)->actions, list)
+
+#define tc_single_action(_exts) \
+	(list_is_singular(&(_exts)->actions))
+
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
+#define tc_single_action(_exts) false
+
+#endif /* CONFIG_NET_CLS_ACT */
+
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, bool ovr);
-- 
cgit v0.10.2


From 22dc13c837c33207548c8ee5116b64e2930a6e23 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 13 Aug 2016 22:35:00 -0700
Subject: net_sched: convert tcf_exts from list to pointer array

As pointed out by Jamal, an action could be shared by
multiple filters, so we can't use list to chain them
any more after we get rid of the original tc_action.
Instead, we could just save pointers to these actions
in tcf_exts, since they are refcount'ed, so convert
the list to an array of pointers.

The "ugly" part is the action API still accepts list
as a parameter, I just introduce a helper function to
convert the array of pointers to a list, instead of
relying on the C99 feature to iterate the array.

Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common")
Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ee57a89..b4f0374 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8396,12 +8396,14 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
 			    struct tcf_exts *exts, u64 *action, u8 *queue)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 	int err;
 
 	if (tc_no_actions(exts))
 		return -EINVAL;
 
-	tc_for_each_action(a, exts) {
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 
 		/* Drop action */
 		if (is_tcf_gact_shot(a)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0f19b01..dc8b1cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -318,6 +318,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				u32 *action, u32 *flow_tag)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 
 	if (tc_no_actions(exts))
 		return -EINVAL;
@@ -325,7 +326,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	*flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 	*action = 0;
 
-	tc_for_each_action(a, exts) {
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 		/* Only support a single action per rule */
 		if (*action)
 			return -EINVAL;
@@ -362,13 +364,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				u32 *action, u32 *dest_vport)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 
 	if (tc_no_actions(exts))
 		return -EINVAL;
 
 	*action = 0;
 
-	tc_for_each_action(a, exts) {
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 		/* Only support a single action per rule */
 		if (*action)
 			return -EINVAL;
@@ -503,6 +507,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
 	struct mlx5e_tc_flow *flow;
 	struct tc_action *a;
 	struct mlx5_fc *counter;
+	LIST_HEAD(actions);
 	u64 bytes;
 	u64 packets;
 	u64 lastuse;
@@ -518,7 +523,8 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
 
 	mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 
-	tc_for_each_action(a, f->exts)
+	tcf_exts_to_list(f->exts, &actions);
+	list_for_each_entry(a, &actions, list)
 		tcf_action_stats_update(a, bytes, packets, lastuse);
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 1fe9fbd..1f81689 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1121,6 +1121,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 					  bool ingress)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 	int err;
 
 	if (!tc_single_action(cls->exts)) {
@@ -1128,7 +1129,8 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 		return -ENOTSUPP;
 	}
 
-	tc_for_each_action(a, cls->exts) {
+	tcf_exts_to_list(cls->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 		if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL))
 			return -ENOTSUPP;
 
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 870332f..82f3c91 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -176,8 +176,8 @@ int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
 int tcf_unregister_action(struct tc_action_ops *a,
 			  struct pernet_operations *ops);
 int tcf_action_destroy(struct list_head *actions, int bind);
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-		    struct tcf_result *res);
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct nlattr *nla,
 				  struct nlattr *est, char *n, int ovr,
 				  int bind, struct list_head *);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 00dd5c4..c99508d 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -59,7 +59,8 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
 struct tcf_exts {
 #ifdef CONFIG_NET_CLS_ACT
 	__u32	type; /* for backward compat(TCA_OLD_COMPAT) */
-	struct list_head actions;
+	int nr_actions;
+	struct tc_action **actions;
 #endif
 	/* Map to export classifier specific extension TLV types to the
 	 * generic extensions API. Unsupported extensions must be set to 0.
@@ -72,7 +73,10 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	exts->type = 0;
-	INIT_LIST_HEAD(&exts->actions);
+	exts->nr_actions = 0;
+	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
+				GFP_KERNEL);
+	WARN_ON(!exts->actions); /* TODO: propagate the error to callers */
 #endif
 	exts->action = action;
 	exts->police = police;
@@ -89,7 +93,7 @@ static inline int
 tcf_exts_is_predicative(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	return !list_empty(&exts->actions);
+	return exts->nr_actions;
 #else
 	return 0;
 #endif
@@ -108,6 +112,20 @@ tcf_exts_is_available(struct tcf_exts *exts)
 	return tcf_exts_is_predicative(exts);
 }
 
+static inline void tcf_exts_to_list(const struct tcf_exts *exts,
+				    struct list_head *actions)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	int i;
+
+	for (i = 0; i < exts->nr_actions; i++) {
+		struct tc_action *a = exts->actions[i];
+
+		list_add(&a->list, actions);
+	}
+#endif
+}
+
 /**
  * tcf_exts_exec - execute tc filter extensions
  * @skb: socket buffer
@@ -124,27 +142,21 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 	       struct tcf_result *res)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (!list_empty(&exts->actions))
-		return tcf_action_exec(skb, &exts->actions, res);
+	if (exts->nr_actions)
+		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
+				       res);
 #endif
 	return 0;
 }
 
 #ifdef CONFIG_NET_CLS_ACT
 
-#define tc_no_actions(_exts) \
-	(list_empty(&(_exts)->actions))
-
-#define tc_for_each_action(_a, _exts) \
-	list_for_each_entry(_a, &(_exts)->actions, list)
-
-#define tc_single_action(_exts) \
-	(list_is_singular(&(_exts)->actions))
+#define tc_no_actions(_exts)  ((_exts)->nr_actions == 0)
+#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
 
 #else /* CONFIG_NET_CLS_ACT */
 
 #define tc_no_actions(_exts) true
-#define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
 #define tc_single_action(_exts) false
 
 #endif /* CONFIG_NET_CLS_ACT */
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index b4c7be3..d09d068 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -420,18 +420,19 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
 	return res;
 }
 
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-		    struct tcf_result *res)
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+		    int nr_actions, struct tcf_result *res)
 {
-	const struct tc_action *a;
-	int ret = -1;
+	int ret = -1, i;
 
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
 		ret = TC_ACT_OK;
 		goto exec_done;
 	}
-	list_for_each_entry(a, actions, list) {
+	for (i = 0; i < nr_actions; i++) {
+		const struct tc_action *a = actions[i];
+
 repeat:
 		ret = a->ops->act(skb, a, res);
 		if (ret == TC_ACT_REPEAT)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 843a716..a7c5645 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -541,8 +541,12 @@ out:
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
-	INIT_LIST_HEAD(&exts->actions);
+	LIST_HEAD(actions);
+
+	tcf_exts_to_list(exts, &actions);
+	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+	kfree(exts->actions);
+	exts->nr_actions = 0;
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
@@ -554,7 +558,6 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 	{
 		struct tc_action *act;
 
-		INIT_LIST_HEAD(&exts->actions);
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
 						"police", ovr,
@@ -563,14 +566,20 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 				return PTR_ERR(act);
 
 			act->type = exts->type = TCA_OLD_COMPAT;
-			list_add(&act->list, &exts->actions);
+			exts->actions[0] = act;
+			exts->nr_actions = 1;
 		} else if (exts->action && tb[exts->action]) {
-			int err;
+			LIST_HEAD(actions);
+			int err, i = 0;
+
 			err = tcf_action_init(net, tb[exts->action], rate_tlv,
 					      NULL, ovr,
-					      TCA_ACT_BIND, &exts->actions);
+					      TCA_ACT_BIND, &actions);
 			if (err)
 				return err;
+			list_for_each_entry(act, &actions, list)
+				exts->actions[i++] = act;
+			exts->nr_actions = i;
 		}
 	}
 #else
@@ -587,37 +596,49 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 		     struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	LIST_HEAD(tmp);
+	struct tcf_exts old = *dst;
+
 	tcf_tree_lock(tp);
-	list_splice_init(&dst->actions, &tmp);
-	list_splice(&src->actions, &dst->actions);
+	dst->nr_actions = src->nr_actions;
+	dst->actions = src->actions;
 	dst->type = src->type;
 	tcf_tree_unlock(tp);
-	tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
+
+	tcf_exts_destroy(&old);
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_change);
 
-#define tcf_exts_first_act(ext)					\
-	list_first_entry_or_null(&(exts)->actions,		\
-				 struct tc_action, list)
+#ifdef CONFIG_NET_CLS_ACT
+static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
+{
+	if (exts->nr_actions == 0)
+		return NULL;
+	else
+		return exts->actions[0];
+}
+#endif
 
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct nlattr *nest;
 
-	if (exts->action && !list_empty(&exts->actions)) {
+	if (exts->action && exts->nr_actions) {
 		/*
 		 * again for backward compatible mode - we want
 		 * to work with both old and new modes of entering
 		 * tc data even if iproute2  was newer - jhs
 		 */
 		if (exts->type != TCA_OLD_COMPAT) {
+			LIST_HEAD(actions);
+
 			nest = nla_nest_start(skb, exts->action);
 			if (nest == NULL)
 				goto nla_put_failure;
-			if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
+
+			tcf_exts_to_list(exts, &actions);
+			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
 		} else if (exts->police) {
-- 
cgit v0.10.2


From 0852e455238f8550fa92b1e40355eb2c6805787e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 13 Aug 2016 22:35:01 -0700
Subject: net_sched: unify the init logic for act_police

Jamal reported a crash when we create a police action
with a specific index, this is because the init logic
is not correct, we should always create one for this
case. Just unify the logic with other tc actions.

Fixes: a03e6fe56971 ("act_police: fix a crash during removal")
Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index b3c7e97..259352d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -125,6 +125,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 	struct tcf_police *police;
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 	struct tc_action_net *tn = net_generic(net, police_net_id);
+	bool exists = false;
 	int size;
 
 	if (nla == NULL)
@@ -139,24 +140,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 	size = nla_len(tb[TCA_POLICE_TBF]);
 	if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
 		return -EINVAL;
+
 	parm = nla_data(tb[TCA_POLICE_TBF]);
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
 
-	if (parm->index) {
-		if (tcf_hash_check(tn, parm->index, a, bind)) {
-			if (ovr)
-				goto override;
-			/* not replacing */
-			return -EEXIST;
-		}
-	} else {
+	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, NULL, a,
 				      &act_police_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
+	} else {
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
 	}
 
-override:
 	police = to_police(*a);
 	if (parm->rate.rate) {
 		err = -ENOMEM;
-- 
cgit v0.10.2


From b5ac851885accffe0485aea2805df8f2d49c95a8 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Sat, 13 Aug 2016 22:35:02 -0700
Subject: net_sched: allow flushing tc police actions

The act_police uses its own code to walk the
action hashtable, which leads to that we could
not flush standalone tc police actions, so just
switch to tcf_generic_walker() like other actions.

(Joint work from Roman and Cong.)

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 259352d..8a3be1d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -63,49 +63,8 @@ static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
 				 const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
-	struct nlattr *nest;
-
-	spin_lock_bh(&hinfo->lock);
-
-	s_i = cb->args[0];
-
-	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
-		struct hlist_head *head;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)];
-
-		hlist_for_each_entry_rcu(p, head, tcfa_head) {
-			index++;
-			if (index < s_i)
-				continue;
-			nest = nla_nest_start(skb, index);
-			if (nest == NULL)
-				goto nla_put_failure;
-			if (type == RTM_DELACTION)
-				err = tcf_action_dump_1(skb, p, 0, 1);
-			else
-				err = tcf_action_dump_1(skb, p, 0, 0);
-			if (err < 0) {
-				index--;
-				nla_nest_cancel(skb, nest);
-				goto done;
-			}
-			nla_nest_end(skb, nest);
-			n_i++;
-		}
-	}
-done:
-	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
-		cb->args[0] += n_i;
-	return n_i;
 
-nla_put_failure:
-	nla_nest_cancel(skb, nest);
-	goto done;
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
-- 
cgit v0.10.2