From f38b7c2547537a8219d273e20eb3b88e6fc6b764 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 28 Sep 2016 14:38:36 +0000 Subject: wlcore: sdio: drop kfree for memory allocated with devm_kzalloc It's not necessary to free memory allocated with devm_kzalloc and using kfree leads to a double free. Fixes: d776fc86b82f ("wlcore: sdio: Populate config firmware data") Signed-off-by: Wei Yongjun Acked-by: Tony Lindgren Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index a6e94b1..47fe7f9 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -391,7 +391,6 @@ static void wl1271_remove(struct sdio_func *func) pm_runtime_get_noresume(&func->dev); platform_device_unregister(glue->core); - kfree(glue); } #ifdef CONFIG_PM -- cgit v0.10.2 From 1e54134ccad00f76ddf00f3e77db3dc8fdefbb47 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Thu, 29 Sep 2016 15:40:54 -0400 Subject: rtl8xxxu: Fix memory leak in handling rxdesc16 packets A device running without RX package aggregation could return more data in the USB packet than the actual network packet. In this case we could would clone the skb but then determine that that there was no packet to handle and exit without freeing the cloned skb first. This has so far only been observed with 8188eu devices, but could affect others. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index b2d7f6e..a96ff17 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5197,7 +5197,12 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb) pkt_offset = roundup(pkt_len + drvinfo_sz + desc_shift + sizeof(struct rtl8xxxu_rxdesc16), 128); - if (pkt_cnt > 1) + /* + * Only clone the skb if there's enough data at the end to + * at least cover the rx descriptor + */ + if (pkt_cnt > 1 && + urb_len > (pkt_offset + sizeof(struct rtl8xxxu_rxdesc16))) next_skb = skb_clone(skb, GFP_ATOMIC); rx_status = IEEE80211_SKB_RXCB(skb); -- cgit v0.10.2 From 8a55698f2f29d227825173420d7b99b9277ca88c Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Thu, 29 Sep 2016 15:40:55 -0400 Subject: rtl8xxxu: Fix big-endian problem reporting mactime The full RX descriptor is converted so converting tsfl again would return it to it's original endian value. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 10166289..08d587a 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -238,7 +238,7 @@ struct rtl8xxxu_rxdesc16 { u32 pattern1match:1; u32 pattern0match:1; #endif - __le32 tsfl; + u32 tsfl; #if 0 u32 bassn:12; u32 bavld:1; @@ -368,7 +368,7 @@ struct rtl8xxxu_rxdesc24 { u32 ldcp:1; u32 splcp:1; #endif - __le32 tsfl; + u32 tsfl; }; struct rtl8xxxu_txdesc32 { diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index a96ff17..a5e6ec2 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5220,7 +5220,7 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb) rtl8xxxu_rx_parse_phystats(priv, rx_status, phy_stats, rx_desc->rxmcs); - rx_status->mactime = le32_to_cpu(rx_desc->tsfl); + rx_status->mactime = rx_desc->tsfl; rx_status->flag |= RX_FLAG_MACTIME_START; if (!rx_desc->swdec) @@ -5290,7 +5290,7 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb) rtl8xxxu_rx_parse_phystats(priv, rx_status, phy_stats, rx_desc->rxmcs); - rx_status->mactime = le32_to_cpu(rx_desc->tsfl); + rx_status->mactime = rx_desc->tsfl; rx_status->flag |= RX_FLAG_MACTIME_START; if (!rx_desc->swdec) -- cgit v0.10.2 From ab05e5ec81c76f3a852919c22984c885edd2414a Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 30 Sep 2016 19:35:17 -0400 Subject: rtl8xxxu: Fix rtl8723bu driver reload issue The generic disable_rf() function clears bits 22 and 23 in REG_RX_WAIT_CCA, however we did not re-enable them again in rtl8723b_enable_rf() This resolves the problem for me with 8723bu devices not working again after reloading the driver. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index 6c086b5..02b8ddd 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1498,6 +1498,10 @@ static void rtl8723b_enable_rf(struct rtl8xxxu_priv *priv) u32 val32; u8 val8; + val32 = rtl8xxxu_read32(priv, REG_RX_WAIT_CCA); + val32 |= (BIT(22) | BIT(23)); + rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32); + /* * No indication anywhere as to what 0x0790 does. The 2 antenna * vendor code preserves bits 6-7 here. -- cgit v0.10.2 From 29d5e6fbd65be89dcd32f070fc45ee0e3b2f82b6 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 30 Sep 2016 19:35:18 -0400 Subject: rtl8xxxu: Fix rtl8192eu driver reload issue The 8192eu suffered from two issues when reloading the driver. The same problems as with the 8723bu where REG_RX_WAIT_CCA bits 22 and 23 didn't get set in rtl8192e_enable_rf(). In addition it also seems prone to issues when setting REG_RF_CTRL to 0 intead of just disabling the RF_ENABLE bit. Similar to what was causing issues with the 8188eu. With this patch I can successfully reload the driver and reassociate to an APi with an 8192eu dongle. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index df54d27..a793fed 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1461,7 +1461,9 @@ static int rtl8192eu_active_to_emu(struct rtl8xxxu_priv *priv) int count, ret = 0; /* Turn off RF */ - rtl8xxxu_write8(priv, REG_RF_CTRL, 0); + val8 = rtl8xxxu_read8(priv, REG_RF_CTRL); + val8 &= ~RF_ENABLE; + rtl8xxxu_write8(priv, REG_RF_CTRL, val8); /* Switch DPDT_SEL_P output from register 0x65[2] */ val8 = rtl8xxxu_read8(priv, REG_LEDCFG2); @@ -1593,6 +1595,10 @@ static void rtl8192e_enable_rf(struct rtl8xxxu_priv *priv) u32 val32; u8 val8; + val32 = rtl8xxxu_read32(priv, REG_RX_WAIT_CCA); + val32 |= (BIT(22) | BIT(23)); + rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32); + val8 = rtl8xxxu_read8(priv, REG_GPIO_MUXCFG); val8 |= BIT(5); rtl8xxxu_write8(priv, REG_GPIO_MUXCFG, val8); -- cgit v0.10.2 From ea720935cf6686f72def9d322298bf7e9bd53377 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 10:14:42 +0200 Subject: mac80211: discard multicast and 4-addr A-MSDUs In mac80211, multicast A-MSDUs are accepted in many cases that they shouldn't be accepted in: * drop A-MSDUs with a multicast A1 (RA), as required by the spec in 9.11 (802.11-2012 version) * drop A-MSDUs with a 4-addr header, since the fourth address can't actually be useful for them; unless 4-address frame format is actually requested, even though the fourth address is still not useful in this case, but ignored Accepting the first case, in particular, is very problematic since it allows anyone else with possession of a GTK to send unicast frames encapsulated in a multicast A-MSDU, even when the AP has client isolation enabled. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6175db3..3ac2f1c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2308,16 +2308,22 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (!(status->rx_flags & IEEE80211_RX_AMSDU)) return RX_CONTINUE; - if (ieee80211_has_a4(hdr->frame_control) && - rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !rx->sdata->u.vlan.sta) - return RX_DROP_UNUSABLE; + if (unlikely(ieee80211_has_a4(hdr->frame_control))) { + switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + if (!rx->sdata->u.vlan.sta) + return RX_DROP_UNUSABLE; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sdata->u.mgd.use_4addr) + return RX_DROP_UNUSABLE; + break; + default: + return RX_DROP_UNUSABLE; + } + } - if (is_multicast_ether_addr(hdr->addr1) && - ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - rx->sdata->u.vlan.sta) || - (rx->sdata->vif.type == NL80211_IFTYPE_STATION && - rx->sdata->u.mgd.use_4addr))) + if (is_multicast_ether_addr(hdr->addr1)) return RX_DROP_UNUSABLE; skb->dev = dev; -- cgit v0.10.2 From 7f6990c830f3e8d703f13d7963acf51936c52ad2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 15:29:49 +0200 Subject: cfg80211: let ieee80211_amsdu_to_8023s() take only header-less SKB There's only a single case where has_80211_header is passed as true, which is in mac80211. Given that there's only simple code that needs to be done before calling it, export that function from cfg80211 instead and let mac80211 call it itself. Signed-off-by: Johannes Berg diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 9448012..e9f462e 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -45,7 +45,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length)); ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr, - priv->wdev.iftype, 0, false); + priv->wdev.iftype, 0); while (!skb_queue_empty(&list)) { struct rx_packet_hdr *rx_hdr; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fe78f02..f372ead 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4040,14 +4040,29 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); */ /** + * ieee80211_data_to_8023_exthdr - convert an 802.11 data frame to 802.3 + * @skb: the 802.11 data frame + * @ehdr: pointer to a &struct ethhdr that will get the header, instead + * of it being pushed into the SKB + * @addr: the device MAC address + * @iftype: the virtual interface type + * Return: 0 on success. Non-zero on error. + */ +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype); + +/** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame * @addr: the device MAC address * @iftype: the virtual interface type * Return: 0 on success. Non-zero on error. */ -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype); +static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, + enum nl80211_iftype iftype) +{ + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); +} /** * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 @@ -4065,22 +4080,20 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * - * Decode an IEEE 802.11n A-MSDU frame and convert it to a list of - * 802.3 frames. The @list will be empty if the decode fails. The - * @skb is consumed after the function returns. + * Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames. + * The @list will be empty if the decode fails. The @skb must be fully + * header-less before being passed in here; it is freed in this function. * - * @skb: The input IEEE 802.11n A-MSDU frame. + * @skb: The input A-MSDU frame without any headers. * @list: The output list of 802.3 frames. It must be allocated and * initialized by by the caller. * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. - * @has_80211_header: Set it true if SKB is with IEEE 802.11 header. */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom, - bool has_80211_header); + const unsigned int extra_headroom); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3ac2f1c..de2d75f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2298,6 +2298,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + struct ethhdr ethhdr; if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -2329,9 +2330,14 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) skb->dev = dev; __skb_queue_head_init(&frame_list); + if (ieee80211_data_to_8023_exthdr(skb, ðhdr, + rx->sdata->vif.addr, + rx->sdata->vif.type)) + return RX_DROP_UNUSABLE; + ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, - rx->local->hw.extra_tx_headroom, true); + rx->local->hw.extra_tx_headroom); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index 8edce22..e36ede8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -420,8 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) } EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); -static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, - const u8 *addr, enum nl80211_iftype iftype) +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -525,13 +525,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, return 0; } - -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype) -{ - return __ieee80211_data_to_8023(skb, NULL, addr, iftype); -} -EXPORT_SYMBOL(ieee80211_data_to_8023); +EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype, @@ -745,25 +739,18 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom, - bool has_80211_header) + const unsigned int extra_headroom) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; u16 ethertype; u8 *payload; - int offset = 0, remaining, err; + int offset = 0, remaining; struct ethhdr eth; bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb); bool reuse_skb = false; bool last = false; - if (has_80211_header) { - err = __ieee80211_data_to_8023(skb, ð, addr, iftype); - if (err) - goto out; - } - while (!last) { unsigned int subframe_len; int len; @@ -819,7 +806,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, purge: __skb_queue_purge(list); - out: dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); -- cgit v0.10.2 From 8b935ee2ea17db720d70f6420f77f594c0c93f75 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 16:17:01 +0200 Subject: cfg80211: add ability to check DA/SA in A-MSDU decapsulation We should not accept arbitrary DA/SA inside A-MSDUs, it could be used to circumvent protections, like allowing a station to send frames and make them seem to come from somewhere else. Add the necessary infrastructure in cfg80211 to allow such checks, in further patches we'll start using them. Signed-off-by: Johannes Berg diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index e9f462e..274dd5a 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -45,7 +45,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length)); ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr, - priv->wdev.iftype, 0); + priv->wdev.iftype, 0, NULL, NULL); while (!skb_queue_empty(&list)) { struct rx_packet_hdr *rx_hdr; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f372ead..7df600c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4090,10 +4090,13 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. + * @check_da: DA to check in the inner ethernet header, or NULL + * @check_sa: SA to check in the inner ethernet header, or NULL */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom); + const unsigned int extra_headroom, + const u8 *check_da, const u8 *check_sa); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index de2d75f..cf53fe1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2337,7 +2337,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, - rx->local->hw.extra_tx_headroom); + rx->local->hw.extra_tx_headroom, + NULL, NULL); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index e36ede8..5ea12af 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -739,7 +739,8 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom) + const unsigned int extra_headroom, + const u8 *check_da, const u8 *check_sa) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; @@ -767,8 +768,17 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, goto purge; offset += sizeof(struct ethhdr); - /* reuse skb for the last subframe */ last = remaining <= subframe_len + padding; + + /* FIXME: should we really accept multicast DA? */ + if ((check_da && !is_multicast_ether_addr(eth.h_dest) && + !ether_addr_equal(check_da, eth.h_dest)) || + (check_sa && !ether_addr_equal(check_sa, eth.h_source))) { + offset += len + padding; + continue; + } + + /* reuse skb for the last subframe */ if (!skb_is_nonlinear(skb) && !reuse_frag && last) { skb_pull(skb, offset); frame = skb; -- cgit v0.10.2 From e2b5227faaf33109d9d00f8a99f7b4f45a9d9c9f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 16:42:06 +0200 Subject: mac80211: validate DA/SA during A-MSDU decapsulation As pointed out by Michael Braun, we don't check inner L2 addresses during A-MSDU decapsulation, leading to the possibility that, for example, a station associated to an AP sends frames as though they came from somewhere else. Fix this problem by letting cfg80211 validate the addresses, as indicated by passing in the ones that need to be validated. Reported-by: Michael Braun Signed-off-by: Johannes Berg diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index cf53fe1..a47bbc9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2299,6 +2299,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) struct sk_buff_head frame_list; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct ethhdr ethhdr; + const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source; if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -2322,6 +2323,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) default: return RX_DROP_UNUSABLE; } + check_da = NULL; + check_sa = NULL; + } else switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + check_da = NULL; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sta || + !test_sta_flag(rx->sta, WLAN_STA_TDLS_PEER)) + check_sa = NULL; + break; + case NL80211_IFTYPE_MESH_POINT: + check_sa = NULL; + break; + default: + break; } if (is_multicast_ether_addr(hdr->addr1)) @@ -2338,7 +2356,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, rx->local->hw.extra_tx_headroom, - NULL, NULL); + check_da, check_sa); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); -- cgit v0.10.2 From 1d4de2e222b41006007d7dbfce2abfe448217e49 Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Mon, 3 Oct 2016 13:14:15 +0200 Subject: mac80211: fix CMD_FRAME for AP_VLAN When using IEEE 802.11r FT OVER-DS roaming with AP_VLAN, hostapd needs to send out a frame using CMD_FRAME for a station assigned to an AP_VLAN interface. Right now, the userspace needs to give the exact AP_VLAN interface index for CMD_FRAME; hostapd does not do this. Additionally, userspace cannot use GET_STATION to query the AP_VLAN ifidx, as while GET_STATION finds stations assigned to AP_VLAN even if the AP iface is queried, it does not return AP_VLAN ifidx (it returns the queried one). This breaks IEEE 802.11r over_ds with vlans, as the reply frame does not get out. This patch fixes this by using get_sta_bss for CMD_FRAME. Signed-off-by: Michael Braun Signed-off-by: Johannes Berg diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index c3f610b..eede5c6 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -820,7 +820,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) break; rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->da); + sta = sta_info_get_bss(sdata, mgmt->da); rcu_read_unlock(); if (!sta) return -ENOLINK; -- cgit v0.10.2 From 5e2bd93b8fcac8c0cf83f189d996831fb21f2db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20de=20Bretagne?= Date: Sun, 9 Oct 2016 15:51:05 +0200 Subject: Bluetooth: hci_bcm: Fix autosuspend PM for Lenovo ThinkPad 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI table for BCM2E55 of Lenovo ThinkPad 8 is not correct. Set correctly IRQ polarity for this device, fixing the issue of bluetooth never resuming after autosuspend PM. Signed-off-by: Jérôme de Bretagne Signed-off-by: Marcel Holtmann diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 5ccb90e..8f6c23c 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -643,6 +643,14 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { }, .driver_data = &acpi_active_low, }, + { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */ + .ident = "Lenovo ThinkPad 8", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"), + }, + .driver_data = &acpi_active_low, + }, { } }; -- cgit v0.10.2 From 233c9edcca0136f2b9b304fd4f32e9bb6ce88ea9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:09:44 +0300 Subject: afs: unmapping the wrong buffer We switched from kmap_atomic() to kmap() so the kunmap() calls need to be updated to match. Fixes: d001648ec7cf ('rxrpc: Don't expose skbs to in-kernel users [ver #2]') Signed-off-by: Dan Carpenter Signed-off-by: David Howells diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 96f4d76..31c616a 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -364,7 +364,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) buffer = kmap(page); ret = afs_extract_data(call, buffer, call->count, true); - kunmap(buffer); + kunmap(page); if (ret < 0) return ret; } @@ -397,7 +397,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) page = call->reply3; buffer = kmap(page); memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap(buffer); + kunmap(page); } _leave(" = 0 [done]"); -- cgit v0.10.2 From 54fde4234579d3b1311b3ed1a1e95526a7cfdcd7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Oct 2016 08:39:52 +0100 Subject: rxrpc: Fix checker warning by not passing always-zero value to ERR_PTR() Fix the following checker warning: net/rxrpc/call_object.c:279 rxrpc_new_client_call() warn: passing zero to 'ERR_PTR' where a value that's always zero is passed to ERR_PTR() so that it can be passed to a tracepoint in an auxiliary pointer field. Just pass NULL instead to the tracepoint. Fixes: a84a46d73050 ("rxrpc: Add some additional call tracing") Reported-by: Dan Carpenter Signed-off-by: David Howells diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4353a29..1ed18d8 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -276,7 +276,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto error; trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, ERR_PTR(ret)); + here, NULL); spin_lock_bh(&call->conn->params.peer->lock); hlist_add_head(&call->error_link, -- cgit v0.10.2 From 07096f612fdf2bb5578cd1fecb2884bdbb1cde42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Oct 2016 08:43:17 +0100 Subject: rxrpc: Fix checking of error from ip6_route_output() ip6_route_output() doesn't return a negative error when it fails, rather the ->error field of the returned dst_entry struct needs to be checked. Reported-by: Dan Carpenter Fixes: 75b54cb57ca3 ("rxrpc: Add IPv6 support") Signed-off-by: David Howells diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 941b724..862eea6 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -193,8 +193,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) fl6->fl6_dport = htons(7001); fl6->fl6_sport = htons(7000); dst = ip6_route_output(&init_net, NULL, fl6); - if (IS_ERR(dst)) { - _leave(" [route err %ld]", PTR_ERR(dst)); + if (dst->error) { + _leave(" [route err %d]", dst->error); return; } break; -- cgit v0.10.2 From cf4747d7535a936105f0abe8d8109d3fe339162b Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 12 Oct 2016 13:54:04 -0500 Subject: rtlwifi: Fix regression caused by commit d86e64768859 In commit d86e64768859 ("rtlwifi: rtl818x: constify local structures"), the configuration struct for most of the drivers was changed to be constant. The problem is that five of the modified drivers need to be able to update the firmware name based on the exact model of the card. As the file names were stored in one of the members of that struct, these drivers would fail with a kernel BUG splat when they tried to update the firmware name. Rather than reverting the previous commit, I used a suggestion by Johannes Berg and made the firmware file name pointers be local to the routines that update the software variables. The configuration struct of rtl8192cu, which was not touched in the previous patch, is now constantfied. Fixes: d86e64768859 ("rtlwifi: rtl818x: constify local structures") Suggested-by: Johannes Berg Signed-off-by: Larry Finger Cc: Stable # 4.8 Cc: Julia Lawall Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index f95760c..8e7f23c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -111,7 +111,7 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, if (!err) goto found_alt; } - pr_err("Firmware %s not available\n", rtlpriv->cfg->fw_name); + pr_err("Selected firmware is not available\n"); rtlpriv->max_fw_size = 0; return; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index e7b11b4..f361808 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -86,6 +86,7 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); u8 tid; + char *fw_name; rtl8188ee_bt_reg_init(hw); rtlpriv->dm.dm_initialgain_enable = 1; @@ -169,10 +170,10 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) return 1; } - rtlpriv->cfg->fw_name = "rtlwifi/rtl8188efw.bin"; + fw_name = "rtlwifi/rtl8188efw.bin"; rtlpriv->max_fw_size = 0x8000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -284,7 +285,6 @@ static const struct rtl_hal_cfg rtl88ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl88e_pci", - .fw_name = "rtlwifi/rtl8188efw.bin", .ops = &rtl8188ee_hal_ops, .mod_params = &rtl88ee_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index 87aa209..8b6e37c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -96,6 +96,7 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); + char *fw_name = "rtlwifi/rtl8192cfwU.bin"; rtl8192ce_bt_reg_init(hw); @@ -167,15 +168,12 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) } /* request fw */ - if (IS_VENDOR_UMC_A_CUT(rtlhal->version) && - !IS_92C_SERIAL(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU.bin"; - else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU_B.bin"; + if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version)) + fw_name = "rtlwifi/rtl8192cfwU_B.bin"; rtlpriv->max_fw_size = 0x4000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -262,7 +260,6 @@ static const struct rtl_hal_cfg rtl92ce_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92c_pci", - .fw_name = "rtlwifi/rtl8192cfw.bin", .ops = &rtl8192ce_hal_ops, .mod_params = &rtl92ce_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c index 7c6f7f0..f953320 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c @@ -59,6 +59,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); int err; + char *fw_name; rtlpriv->dm.dm_initialgain_enable = true; rtlpriv->dm.dm_flag = 0; @@ -77,18 +78,18 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) } if (IS_VENDOR_UMC_A_CUT(rtlpriv->rtlhal.version) && !IS_92C_SERIAL(rtlpriv->rtlhal.version)) { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_A.bin"; + fw_name = "rtlwifi/rtl8192cufw_A.bin"; } else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlpriv->rtlhal.version)) { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_B.bin"; + fw_name = "rtlwifi/rtl8192cufw_B.bin"; } else { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_TMSC.bin"; + fw_name = "rtlwifi/rtl8192cufw_TMSC.bin"; } /* provide name of alternative file */ rtlpriv->cfg->alt_fw_name = "rtlwifi/rtl8192cufw.bin"; - pr_info("Loading firmware %s\n", rtlpriv->cfg->fw_name); + pr_info("Loading firmware %s\n", fw_name); rtlpriv->max_fw_size = 0x4000; err = request_firmware_nowait(THIS_MODULE, 1, - rtlpriv->cfg->fw_name, rtlpriv->io.dev, + fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); return err; } @@ -187,7 +188,6 @@ static struct rtl_hal_usbint_cfg rtl92cu_interface_cfg = { static struct rtl_hal_cfg rtl92cu_hal_cfg = { .name = "rtl92c_usb", - .fw_name = "rtlwifi/rtl8192cufw.bin", .ops = &rtl8192cu_hal_ops, .mod_params = &rtl92cu_mod_params, .usb_interface_cfg = &rtl92cu_interface_cfg, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index 0538a4d..1ebfee1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -92,6 +92,7 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw) u8 tid; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + char *fw_name = "rtlwifi/rtl8192defw.bin"; rtlpriv->dm.dm_initialgain_enable = true; rtlpriv->dm.dm_flag = 0; @@ -181,10 +182,10 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->max_fw_size = 0x8000; pr_info("Driver for Realtek RTL8192DE WLAN interface\n"); - pr_info("Loading firmware file %s\n", rtlpriv->cfg->fw_name); + pr_info("Loading firmware file %s\n", fw_name); /* request fw */ - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -266,7 +267,6 @@ static const struct rtl_hal_cfg rtl92de_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8192de", - .fw_name = "rtlwifi/rtl8192defw.bin", .ops = &rtl8192de_hal_ops, .mod_params = &rtl92de_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c index ac299cb..46b605de3 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c @@ -91,6 +91,7 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); int err = 0; + char *fw_name; rtl92ee_bt_reg_init(hw); rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; @@ -170,11 +171,11 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) } /* request fw */ - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192eefw.bin"; + fw_name = "rtlwifi/rtl8192eefw.bin"; rtlpriv->max_fw_size = 0x8000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -266,7 +267,6 @@ static const struct rtl_hal_cfg rtl92ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92ee_pci", - .fw_name = "rtlwifi/rtl8192eefw.bin", .ops = &rtl8192ee_hal_ops, .mod_params = &rtl92ee_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index 5e8e02d..3e1eaea 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -89,12 +89,13 @@ static void rtl92se_fw_cb(const struct firmware *firmware, void *context) struct ieee80211_hw *hw = context; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rt_firmware *pfirmware = NULL; + char *fw_name = "rtlwifi/rtl8192sefw.bin"; RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, "Firmware callback routine entered!\n"); complete(&rtlpriv->firmware_loading_complete); if (!firmware) { - pr_err("Firmware %s not available\n", rtlpriv->cfg->fw_name); + pr_err("Firmware %s not available\n", fw_name); rtlpriv->max_fw_size = 0; return; } @@ -117,6 +118,7 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); int err = 0; u16 earlyrxthreshold = 7; + char *fw_name = "rtlwifi/rtl8192sefw.bin"; rtlpriv->dm.dm_initialgain_enable = true; rtlpriv->dm.dm_flag = 0; @@ -214,9 +216,9 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->max_fw_size = RTL8190_MAX_FIRMWARE_CODE_SIZE*2 + sizeof(struct fw_hdr); pr_info("Driver for Realtek RTL8192SE/RTL8191SE\n" - "Loading firmware %s\n", rtlpriv->cfg->fw_name); + "Loading firmware %s\n", fw_name); /* request fw */ - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl92se_fw_cb); if (err) { @@ -310,7 +312,6 @@ static const struct rtl_hal_cfg rtl92se_hal_cfg = { .bar_id = 1, .write_readback = false, .name = "rtl92s_pci", - .fw_name = "rtlwifi/rtl8192sefw.bin", .ops = &rtl8192se_hal_ops, .mod_params = &rtl92se_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index 89c828a..c51a9e8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -94,6 +94,7 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); int err = 0; + char *fw_name = "rtlwifi/rtl8723fw.bin"; rtl8723e_bt_reg_init(hw); @@ -176,14 +177,12 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) return 1; } - if (IS_VENDOR_8723_A_CUT(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8723fw.bin"; - else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8723fw_B.bin"; + if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) + fw_name = "rtlwifi/rtl8723fw_B.bin"; rtlpriv->max_fw_size = 0x6000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -280,7 +279,6 @@ static const struct rtl_hal_cfg rtl8723e_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723e_pci", - .fw_name = "rtlwifi/rtl8723efw.bin", .ops = &rtl8723e_hal_ops, .mod_params = &rtl8723e_mod_params, .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c index 20b53f0..847644d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c @@ -91,6 +91,7 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); + char *fw_name = "rtlwifi/rtl8723befw.bin"; rtl8723be_bt_reg_init(hw); rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); @@ -184,8 +185,8 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) } rtlpriv->max_fw_size = 0x8000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -280,7 +281,6 @@ static const struct rtl_hal_cfg rtl8723be_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723be_pci", - .fw_name = "rtlwifi/rtl8723befw.bin", .ops = &rtl8723be_hal_ops, .mod_params = &rtl8723be_mod_params, .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c index 22f687b1..297938e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -93,6 +93,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); + char *fw_name, *wowlan_fw_name; rtl8821ae_bt_reg_init(hw); rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); @@ -203,17 +204,17 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) } if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8812aefw.bin"; - rtlpriv->cfg->wowlan_fw_name = "rtlwifi/rtl8812aefw_wowlan.bin"; + fw_name = "rtlwifi/rtl8812aefw.bin"; + wowlan_fw_name = "rtlwifi/rtl8812aefw_wowlan.bin"; } else { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8821aefw.bin"; - rtlpriv->cfg->wowlan_fw_name = "rtlwifi/rtl8821aefw_wowlan.bin"; + fw_name = "rtlwifi/rtl8821aefw.bin"; + wowlan_fw_name = "rtlwifi/rtl8821aefw_wowlan.bin"; } rtlpriv->max_fw_size = 0x8000; /*load normal firmware*/ - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -222,9 +223,9 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) return 1; } /*load wowlan firmware*/ - pr_info("Using firmware %s\n", rtlpriv->cfg->wowlan_fw_name); + pr_info("Using firmware %s\n", wowlan_fw_name); err = request_firmware_nowait(THIS_MODULE, 1, - rtlpriv->cfg->wowlan_fw_name, + wowlan_fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_wowlan_fw_cb); if (err) { @@ -320,7 +321,6 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8821ae_pci", - .fw_name = "rtlwifi/rtl8821aefw.bin", .ops = &rtl8821ae_hal_ops, .mod_params = &rtl8821ae_mod_params, .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL, diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 595f7d5..dafe486 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2278,9 +2278,7 @@ struct rtl_hal_cfg { u8 bar_id; bool write_readback; char *name; - char *fw_name; char *alt_fw_name; - char *wowlan_fw_name; struct rtl_hal_ops *ops; struct rtl_mod_params *mod_params; struct rtl_hal_usbint_cfg *usb_interface_cfg; -- cgit v0.10.2 From f67b107d4ceddcf7aa65b706aaaf50d68edb52a6 Mon Sep 17 00:00:00 2001 From: Marty Faltesek Date: Mon, 10 Oct 2016 19:00:04 +0300 Subject: ath10k: cache calibration data when the core is stopped Commit 0b8e3c4ca29f ("ath10k: move cal data len to hw_params") broke retrieving the calibration data from cal_data debugfs file. The length of file was always zero. The reason is: static ssize_t ath10k_debug_cal_data_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ath10k *ar = file->private_data; void *buf = file->private_data; This is obviously bogus, private_data cannot contain both struct ath10k and the buffer. Fix it by caching calibration data to ar->debug.cal_data. This also allows it to be accessed when the device is not active (interface is down). The cal_data buffer is fixed size because during the first firmware probe we don't yet know what will be the lenght of the calibration data. It was simplest just to use a fixed length. There's a WARN_ON() in ath10k_debug_cal_data_fetch() if the buffer is too small. Tested with qca988x and firmware 10.2.4.70.56. Reported-by: Nikolay Martynov Fixes: 0b8e3c4ca29f ("ath10k: move cal data len to hw_params") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Marty Faltesek [kvalo@qca.qualcomm.com: improve commit log and minor other changes] Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index dda49af..521f1c5 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -450,6 +450,7 @@ struct ath10k_debug { u32 pktlog_filter; u32 reg_addr; u32 nf_cal_period; + void *cal_data; struct ath10k_fw_crash_data *fw_crash_data; }; diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 832da6e..82a4c67 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -30,6 +30,8 @@ /* ms */ #define ATH10K_DEBUG_HTT_STATS_INTERVAL 1000 +#define ATH10K_DEBUG_CAL_DATA_LEN 12064 + #define ATH10K_FW_CRASH_DUMP_VERSION 1 /** @@ -1451,56 +1453,51 @@ static const struct file_operations fops_fw_dbglog = { .llseek = default_llseek, }; -static int ath10k_debug_cal_data_open(struct inode *inode, struct file *file) +static int ath10k_debug_cal_data_fetch(struct ath10k *ar) { - struct ath10k *ar = inode->i_private; - void *buf; u32 hi_addr; __le32 addr; int ret; - mutex_lock(&ar->conf_mutex); - - if (ar->state != ATH10K_STATE_ON && - ar->state != ATH10K_STATE_UTF) { - ret = -ENETDOWN; - goto err; - } + lockdep_assert_held(&ar->conf_mutex); - buf = vmalloc(ar->hw_params.cal_data_len); - if (!buf) { - ret = -ENOMEM; - goto err; - } + if (WARN_ON(ar->hw_params.cal_data_len > ATH10K_DEBUG_CAL_DATA_LEN)) + return -EINVAL; hi_addr = host_interest_item_address(HI_ITEM(hi_board_data)); ret = ath10k_hif_diag_read(ar, hi_addr, &addr, sizeof(addr)); if (ret) { - ath10k_warn(ar, "failed to read hi_board_data address: %d\n", ret); - goto err_vfree; + ath10k_warn(ar, "failed to read hi_board_data address: %d\n", + ret); + return ret; } - ret = ath10k_hif_diag_read(ar, le32_to_cpu(addr), buf, + ret = ath10k_hif_diag_read(ar, le32_to_cpu(addr), ar->debug.cal_data, ar->hw_params.cal_data_len); if (ret) { ath10k_warn(ar, "failed to read calibration data: %d\n", ret); - goto err_vfree; + return ret; } - file->private_data = buf; + return 0; +} - mutex_unlock(&ar->conf_mutex); +static int ath10k_debug_cal_data_open(struct inode *inode, struct file *file) +{ + struct ath10k *ar = inode->i_private; - return 0; + mutex_lock(&ar->conf_mutex); -err_vfree: - vfree(buf); + if (ar->state == ATH10K_STATE_ON || + ar->state == ATH10K_STATE_UTF) { + ath10k_debug_cal_data_fetch(ar); + } -err: + file->private_data = ar; mutex_unlock(&ar->conf_mutex); - return ret; + return 0; } static ssize_t ath10k_debug_cal_data_read(struct file *file, @@ -1508,18 +1505,16 @@ static ssize_t ath10k_debug_cal_data_read(struct file *file, size_t count, loff_t *ppos) { struct ath10k *ar = file->private_data; - void *buf = file->private_data; - return simple_read_from_buffer(user_buf, count, ppos, - buf, ar->hw_params.cal_data_len); -} + mutex_lock(&ar->conf_mutex); -static int ath10k_debug_cal_data_release(struct inode *inode, - struct file *file) -{ - vfree(file->private_data); + count = simple_read_from_buffer(user_buf, count, ppos, + ar->debug.cal_data, + ar->hw_params.cal_data_len); - return 0; + mutex_unlock(&ar->conf_mutex); + + return count; } static ssize_t ath10k_write_ani_enable(struct file *file, @@ -1580,7 +1575,6 @@ static const struct file_operations fops_ani_enable = { static const struct file_operations fops_cal_data = { .open = ath10k_debug_cal_data_open, .read = ath10k_debug_cal_data_read, - .release = ath10k_debug_cal_data_release, .owner = THIS_MODULE, .llseek = default_llseek, }; @@ -1932,6 +1926,8 @@ void ath10k_debug_stop(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); + ath10k_debug_cal_data_fetch(ar); + /* Must not use _sync to avoid deadlock, we do that in * ath10k_debug_destroy(). The check for htt_stats_mask is to avoid * warning from del_timer(). */ @@ -2344,6 +2340,10 @@ int ath10k_debug_create(struct ath10k *ar) if (!ar->debug.fw_crash_data) return -ENOMEM; + ar->debug.cal_data = vzalloc(ATH10K_DEBUG_CAL_DATA_LEN); + if (!ar->debug.cal_data) + return -ENOMEM; + INIT_LIST_HEAD(&ar->debug.fw_stats.pdevs); INIT_LIST_HEAD(&ar->debug.fw_stats.vdevs); INIT_LIST_HEAD(&ar->debug.fw_stats.peers); @@ -2357,6 +2357,9 @@ void ath10k_debug_destroy(struct ath10k *ar) vfree(ar->debug.fw_crash_data); ar->debug.fw_crash_data = NULL; + vfree(ar->debug.cal_data); + ar->debug.cal_data = NULL; + ath10k_debug_fw_stats_reset(ar); kfree(ar->debug.tpc_stats); -- cgit v0.10.2 From 304e5ac118cc351eb047b6c433a89e13ea7259cf Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 11 Oct 2016 19:46:49 +0200 Subject: Revert "ath9k_hw: implement temperature compensation support for AR9003+" This reverts commit 171f6402e4aa ("ath9k_hw: implement temperature compensation support for AR9003+"). Some users report that this commit causes a regression in performance under some conditions. Fixes: 171f6402e4aa ("ath9k_hw: implement temperature compensation support for AR9003+") Cc: # 4.8 Signed-off-by: Felix Fietkau [kvalo@qca.qualcomm.com: improve commit log] Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/ath/ath9k/ar9003_calib.c b/drivers/net/wireless/ath/ath9k/ar9003_calib.c index b6f064a..7e27a06 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_calib.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_calib.c @@ -33,7 +33,6 @@ struct coeff { enum ar9003_cal_types { IQ_MISMATCH_CAL = BIT(0), - TEMP_COMP_CAL = BIT(1), }; static void ar9003_hw_setup_calibration(struct ath_hw *ah, @@ -59,12 +58,6 @@ static void ar9003_hw_setup_calibration(struct ath_hw *ah, /* Kick-off cal */ REG_SET_BIT(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_DO_CAL); break; - case TEMP_COMP_CAL: - ath_dbg(common, CALIBRATE, - "starting Temperature Compensation Calibration\n"); - REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_LOCAL); - REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_START); - break; default: ath_err(common, "Invalid calibration type\n"); break; @@ -93,8 +86,7 @@ static bool ar9003_hw_per_calibration(struct ath_hw *ah, /* * Accumulate cal measures for active chains */ - if (cur_caldata->calCollect) - cur_caldata->calCollect(ah); + cur_caldata->calCollect(ah); ah->cal_samples++; if (ah->cal_samples >= cur_caldata->calNumSamples) { @@ -107,8 +99,7 @@ static bool ar9003_hw_per_calibration(struct ath_hw *ah, /* * Process accumulated data */ - if (cur_caldata->calPostProc) - cur_caldata->calPostProc(ah, numChains); + cur_caldata->calPostProc(ah, numChains); /* Calibration has finished. */ caldata->CalValid |= cur_caldata->calType; @@ -323,16 +314,9 @@ static const struct ath9k_percal_data iq_cal_single_sample = { ar9003_hw_iqcalibrate }; -static const struct ath9k_percal_data temp_cal_single_sample = { - TEMP_COMP_CAL, - MIN_CAL_SAMPLES, - PER_MAX_LOG_COUNT, -}; - static void ar9003_hw_init_cal_settings(struct ath_hw *ah) { ah->iq_caldata.calData = &iq_cal_single_sample; - ah->temp_caldata.calData = &temp_cal_single_sample; if (AR_SREV_9300_20_OR_LATER(ah)) { ah->enabled_cals |= TX_IQ_CAL; @@ -340,7 +324,7 @@ static void ar9003_hw_init_cal_settings(struct ath_hw *ah) ah->enabled_cals |= TX_IQ_ON_AGC_CAL; } - ah->supp_cals = IQ_MISMATCH_CAL | TEMP_COMP_CAL; + ah->supp_cals = IQ_MISMATCH_CAL; } #define OFF_UPPER_LT 24 @@ -1399,9 +1383,6 @@ static void ar9003_hw_init_cal_common(struct ath_hw *ah) INIT_CAL(&ah->iq_caldata); INSERT_CAL(ah, &ah->iq_caldata); - INIT_CAL(&ah->temp_caldata); - INSERT_CAL(ah, &ah->temp_caldata); - /* Initialize current pointer to first element in list */ ah->cal_list_curr = ah->cal_list; diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 2a5d3ad..9cbca12 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -830,7 +830,6 @@ struct ath_hw { /* Calibration */ u32 supp_cals; struct ath9k_cal_list iq_caldata; - struct ath9k_cal_list temp_caldata; struct ath9k_cal_list adcgain_caldata; struct ath9k_cal_list adcdc_caldata; struct ath9k_cal_list *cal_list; -- cgit v0.10.2 From 1ea2643961b0d1b8d0e4a11af5aa69b0f92d0533 Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Wed, 12 Oct 2016 19:08:40 +0100 Subject: ath6kl: add Dell OEM SDIO I/O for the Venue 8 Pro SDIO ID 0271:0418 Signed-off-by: Alan Cox Bugzilla-ID: https://bugzilla.kernel.org/show_bug.cgi?id=67921 Reviewed-by: Steve deRosier Signed-off-by: Kalle Valo diff --git a/drivers/net/wireless/ath/ath6kl/sdio.c b/drivers/net/wireless/ath/ath6kl/sdio.c index eab0ab9..76eb336 100644 --- a/drivers/net/wireless/ath/ath6kl/sdio.c +++ b/drivers/net/wireless/ath/ath6kl/sdio.c @@ -1401,6 +1401,7 @@ static const struct sdio_device_id ath6kl_sdio_devices[] = { {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x0))}, {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x1))}, {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x2))}, + {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x18))}, {}, }; -- cgit v0.10.2 From 50a2c95381a7d0e453d7bdfde81d0c5f8351ba54 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Oct 2016 08:27:10 +0100 Subject: afs: call->operation_ID sometimes used as __be32 sometimes as u32 call->operation_ID is sometimes being used as __be32 sometimes is being used as u32. Be consistent and settle on using as u32. Signed-off-by: David Howells operation_ID); + _enter("{CB.OP %u}", call->operation_ID); - _enter("{CB.OP %u}", operation_id); - - switch (operation_id) { + switch (call->operation_ID) { case CBCallBack: call->type = &afs_SRXCBCallBack; return true; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5497c84..535a38d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -112,7 +112,7 @@ struct afs_call { bool need_attention; /* T if RxRPC poked us */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ - __be32 operation_ID; /* operation ID for an incoming call */ + u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ afs_dataversion_t store_version; /* updated version expected from store */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 477928b..25f05a8 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -676,10 +676,11 @@ static int afs_deliver_cm_op_id(struct afs_call *call) ASSERTCMP(call->offset, <, 4); /* the operation ID forms the first four bytes of the request data */ - ret = afs_extract_data(call, &call->operation_ID, 4, true); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; + call->operation_ID = ntohl(call->tmp); call->state = AFS_CALL_AWAIT_REQUEST; call->offset = 0; -- cgit v0.10.2 From 68d00f332e0ba7f60f212be74ede290c9f873bc5 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 11 Oct 2016 22:47:20 +0300 Subject: ip6_tunnel: fix ip6_tnl_lookup The commit ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.") introduces support for wildcards in tunnels endpoints, but in some rare circumstances ip6_tnl_lookup selects wrong tunnel interface relying only on source or destination address of the packet and not checking presence of wildcard in tunnels endpoints. Later in ip6_tnl_rcv this packets can be dicarded because of difference in ipproto even if fallback device have proper ipproto configuration. This patch adds checks of wildcard endpoint in tunnel avoiding such behavior Fixes: ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.") Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6a66adb..5692d6b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -157,6 +157,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(&any, local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_any(&t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } @@ -164,6 +165,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(remote, &any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.laddr) && (t->dev->flags & IFF_UP)) return t; } -- cgit v0.10.2 From a220445f9f4382c36a53d8ef3e08165fa27f7e2c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 12 Oct 2016 10:10:40 +0200 Subject: ipv6: correctly add local routes when lo goes up The goal of the patch is to fix this scenario: ip link add dummy1 type dummy ip link set dummy1 up ip link set lo down ; ip link set lo up After that sequence, the local route to the link layer address of dummy1 is not there anymore. When the loopback is set down, all local routes are deleted by addrconf_ifdown()/rt6_ifdown(). At this time, the rt6_info entry still exists, because the corresponding idev has a reference on it. After the rcu grace period, dst_rcu_free() is called, and thus ___dst_free(), which will set obsolete to DST_OBSOLETE_DEAD. In this case, init_loopback() is called before dst_rcu_free(), thus obsolete is still sets to something <= 0. So, the function doesn't add the route again. To avoid that race, let's check the rt6 refcnt instead. Fixes: 25fb6ca4ed9c ("net IPv6 : Fix broken IPv6 routing table after loopback down-up") Fixes: a881ae1f625c ("ipv6: don't call addrconf_dst_alloc again when enable lo") Fixes: 33d99113b110 ("ipv6: reallocate addrconf router for ipv6 address when lo device up") Reported-by: Francesco Santoro Reported-by: Samuel Gauthier CC: Balakumaran Kannan CC: Maruthi Thotad CC: Sabrina Dubroca CC: Hannes Frederic Sowa CC: Weilong Chen CC: Gao feng Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d8983e1..9faafe5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3018,7 +3018,7 @@ static void init_loopback(struct net_device *dev) * lo device down, release this obsolete dst and * reallocate a new router for ifa. */ - if (sp_ifa->rt->dst.obsolete > 0) { + if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { ip6_rt_put(sp_ifa->rt); sp_ifa->rt = NULL; } else { -- cgit v0.10.2 From 558c5eb58a5c029745b29558782d528b05aba8a5 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 12 Oct 2016 15:55:40 -0300 Subject: net: wan: slic_ds26522: add SPI device ID table to fix module autoload If the driver is built as a module, module alias information isn't filled so the module won't be autoloaded. Add a SPI device ID table and use the MODULE_DEVICE_TABLE() macro so the information is exported in the module. Before this patch: $ modinfo drivers/net/wan/slic_ds26522.ko | grep alias $ After this patch: $ modinfo drivers/net/wan/slic_ds26522.ko | grep alias alias: spi:ds26522 Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index d06a887..53366a2 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -223,6 +223,12 @@ static int slic_ds26522_probe(struct spi_device *spi) return ret; } +static const struct spi_device_id slic_ds26522_id[] = { + { .name = "ds26522" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(spi, slic_ds26522_id); + static const struct of_device_id slic_ds26522_match[] = { { .compatible = "maxim,ds26522", @@ -239,6 +245,7 @@ static struct spi_driver slic_ds26522_driver = { }, .probe = slic_ds26522_probe, .remove = slic_ds26522_remove, + .id_table = slic_ds26522_id, }; static int __init slic_ds26522_init(void) -- cgit v0.10.2 From 485c9d43382172c1b2ecf42deb4476b5e37e1264 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 12 Oct 2016 15:55:41 -0300 Subject: net: wan: slic_ds26522: Export OF module alias information When the device is registered via OF, the OF table is used to match the driver instead of the SPI device ID table, but the entries in the later are used as aliasses to load the module if the driver was not built-in. This is because the SPI core always reports an SPI module alias instead of an OF one, but that could change so it's better to always export it. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index 53366a2..b776a0a 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -235,6 +235,7 @@ static const struct of_device_id slic_ds26522_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, slic_ds26522_match); static struct spi_driver slic_ds26522_driver = { .driver = { -- cgit v0.10.2 From 059f01410822b9a37fac0f4da8c2202c2d39e7a5 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 12 Oct 2016 16:05:59 -0300 Subject: net: wan: slic_ds26522: Allow driver to built if COMPILE_TEST is enabled The driver only has runtime but no build time dependency with FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE. So it can be built for testing purposes if the COMPILE_TEST option is enabled. This is useful to have more build coverage and make sure that the driver is not affected by changes that could cause build regressions. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 33ab334..4e9fe75 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -294,7 +294,7 @@ config FSL_UCC_HDLC config SLIC_DS26522 tristate "Slic Maxim ds26522 card support" depends on SPI - depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE + depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST help This module initializes and configures the slic maxim card in T1 or E1 mode. -- cgit v0.10.2 From f56f7d2e1cbe6a34dbda177d4d6245d8f8cb94bd Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Thu, 13 Oct 2016 17:09:51 +0200 Subject: Documentation/networking: update git urls to use https over http This fixes the following errors when trying to clone the urls: Cloning into 'net'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/davem/net.git/' not found Cloning into 'net-next'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/' not found Cloning into 'linux'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/' not found Cloning into 'stable-queue'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git/' not found Signed-off-by: Alexander Alemayhu Signed-off-by: David S. Miller diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt index 0fe1c6e..a20b2fa 100644 --- a/Documentation/networking/netdev-FAQ.txt +++ b/Documentation/networking/netdev-FAQ.txt @@ -29,8 +29,8 @@ A: There are always two trees (git repositories) in play. Both are driven Linus, and net-next is where the new code goes for the future release. You can find the trees here: - http://git.kernel.org/?p=linux/kernel/git/davem/net.git - http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git + https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git + https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git Q: How often do changes from these trees make it to the mainline Linus tree? @@ -76,7 +76,7 @@ Q: So where are we now in this cycle? A: Load the mainline (Linus) page here: - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git and note the top of the "tags" section. If it is rc1, it is early in the dev cycle. If it was tagged rc7 a week ago, then a release @@ -123,7 +123,7 @@ A: Normally Greg Kroah-Hartman collects stable commits himself, but It contains the patches which Dave has selected, but not yet handed off to Greg. If Greg already has the patch, then it will be here: - http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git A quick way to find whether the patch is in this stable-queue is to simply clone the repo, and then git grep the mainline commit ID, e.g. -- cgit v0.10.2 From fc791b6335152c5278dc4a4991bcb2d329f806f9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 13 Oct 2016 18:26:56 +0200 Subject: IB/ipoib: move back IB LL address into the hard header After the commit 9207f9d45b0a ("net: preserve IP control block during GSO segmentation"), the GSO CB and the IPoIB CB conflict. That destroy the IPoIB address information cached there, causing a severe performance regression, as better described here: http://marc.info/?l=linux-kernel&m=146787279825501&w=2 This change moves the data cached by the IPoIB driver from the skb control lock into the IPoIB hard header, as done before the commit 936d7de3d736 ("IPoIB: Stop lying about hard_header_len and use skb->cb to stash LL addresses"). In order to avoid GRO issue, on packet reception, the IPoIB driver stash into the skb a dummy pseudo header, so that the received packets have actually a hard header matching the declared length. To avoid changing the connected mode maximum mtu, the allocated head buffer size is increased by the pseudo header length. After this commit, IPoIB performances are back to pre-regression value. v2 -> v3: rebased v1 -> v2: avoid changing the max mtu, increasing the head buf size Fixes: 9207f9d45b0a ("net: preserve IP control block during GSO segmentation") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 7b8d2d9..da12717 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -63,6 +63,8 @@ enum ipoib_flush_level { enum { IPOIB_ENCAP_LEN = 4, + IPOIB_PSEUDO_LEN = 20, + IPOIB_HARD_LEN = IPOIB_ENCAP_LEN + IPOIB_PSEUDO_LEN, IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN, IPOIB_UD_RX_SG = 2, /* max buffer needed for 4K mtu */ @@ -134,15 +136,21 @@ struct ipoib_header { u16 reserved; }; -struct ipoib_cb { - struct qdisc_skb_cb qdisc_cb; - u8 hwaddr[INFINIBAND_ALEN]; +struct ipoib_pseudo_header { + u8 hwaddr[INFINIBAND_ALEN]; }; -static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb) +static inline void skb_add_pseudo_hdr(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb)); - return (struct ipoib_cb *)skb->cb; + char *data = skb_push(skb, IPOIB_PSEUDO_LEN); + + /* + * only the ipoib header is present now, make room for a dummy + * pseudo header and set skb field accordingly + */ + memset(data, 0, IPOIB_PSEUDO_LEN); + skb_reset_mac_header(skb); + skb_pull(skb, IPOIB_HARD_LEN); } /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 4ad297d..339a1ee 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -63,6 +63,8 @@ MODULE_PARM_DESC(cm_data_debug_level, #define IPOIB_CM_RX_DELAY (3 * 256 * HZ) #define IPOIB_CM_RX_UPDATE_MASK (0x3) +#define IPOIB_CM_RX_RESERVE (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN) + static struct ib_qp_attr ipoib_cm_err_attr = { .qp_state = IB_QPS_ERR }; @@ -146,15 +148,15 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, struct sk_buff *skb; int i; - skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); + skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16)); if (unlikely(!skb)) return NULL; /* - * IPoIB adds a 4 byte header. So we need 12 more bytes to align the + * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the * IP header to a multiple of 16. */ - skb_reserve(skb, 12); + skb_reserve(skb, IPOIB_CM_RX_RESERVE); mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); @@ -624,9 +626,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (wc->byte_len < IPOIB_CM_COPYBREAK) { int dlen = wc->byte_len; - small_skb = dev_alloc_skb(dlen + 12); + small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE); if (small_skb) { - skb_reserve(small_skb, 12); + skb_reserve(small_skb, IPOIB_CM_RX_RESERVE); ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0], dlen, DMA_FROM_DEVICE); skb_copy_from_linear_data(skb, small_skb->data, dlen); @@ -663,8 +665,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) copied: skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb_reset_mac_header(skb); - skb_pull(skb, IPOIB_ENCAP_LEN); + skb_add_pseudo_hdr(skb); ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index be11d5d..830fecb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -128,16 +128,15 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); - skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN); + skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN); if (unlikely(!skb)) return NULL; /* - * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte - * header. So we need 4 more bytes to get to 48 and align the - * IP header to a multiple of 16. + * the IP header will be at IPOIP_HARD_LEN + IB_GRH_BYTES, that is + * 64 bytes aligned */ - skb_reserve(skb, 4); + skb_reserve(skb, sizeof(struct ipoib_pseudo_header)); mapping = priv->rx_ring[id].mapping; mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size, @@ -253,8 +252,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb_reset_mac_header(skb); - skb_pull(skb, IPOIB_ENCAP_LEN); + skb_add_pseudo_hdr(skb); ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 5636fc3..b58d9dc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -925,9 +925,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, ipoib_neigh_free(neigh); goto err_drop; } - if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) + if (skb_queue_len(&neigh->queue) < + IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, IPOIB_PSEUDO_LEN); __skb_queue_tail(&neigh->queue, skb); - else { + } else { ipoib_warn(priv, "queue length limit %d. Packet drop.\n", skb_queue_len(&neigh->queue)); goto err_drop; @@ -964,7 +967,7 @@ err_drop: } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, - struct ipoib_cb *cb) + struct ipoib_pseudo_header *phdr) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; @@ -972,16 +975,18 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, cb->hwaddr + 4); + path = __path_find(dev, phdr->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { - path = path_rec_create(dev, cb->hwaddr + 4); + path = path_rec_create(dev, phdr->hwaddr + 4); new_path = 1; } if (path) { if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, IPOIB_PSEUDO_LEN); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -1009,10 +1014,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, be16_to_cpu(path->pathrec.dlid)); spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); return; } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, IPOIB_PSEUDO_LEN); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -1026,13 +1033,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - struct ipoib_cb *cb = ipoib_skb_cb(skb); + struct ipoib_pseudo_header *phdr; struct ipoib_header *header; unsigned long flags; + phdr = (struct ipoib_pseudo_header *) skb->data; + skb_pull(skb, sizeof(*phdr)); header = (struct ipoib_header *) skb->data; - if (unlikely(cb->hwaddr[4] == 0xff)) { + if (unlikely(phdr->hwaddr[4] == 0xff)) { /* multicast, arrange "if" according to probability */ if ((header->proto != htons(ETH_P_IP)) && (header->proto != htons(ETH_P_IPV6)) && @@ -1045,13 +1054,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } /* Add in the P_Key for multicast*/ - cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; - cb->hwaddr[9] = priv->pkey & 0xff; + phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; + phdr->hwaddr[9] = priv->pkey & 0xff; - neigh = ipoib_neigh_get(dev, cb->hwaddr); + neigh = ipoib_neigh_get(dev, phdr->hwaddr); if (likely(neigh)) goto send_using_neigh; - ipoib_mcast_send(dev, cb->hwaddr, skb); + ipoib_mcast_send(dev, phdr->hwaddr, skb); return NETDEV_TX_OK; } @@ -1060,16 +1069,16 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) case htons(ETH_P_IP): case htons(ETH_P_IPV6): case htons(ETH_P_TIPC): - neigh = ipoib_neigh_get(dev, cb->hwaddr); + neigh = ipoib_neigh_get(dev, phdr->hwaddr); if (unlikely(!neigh)) { - neigh_add_path(skb, cb->hwaddr, dev); + neigh_add_path(skb, phdr->hwaddr, dev); return NETDEV_TX_OK; } break; case htons(ETH_P_ARP): case htons(ETH_P_RARP): /* for unicast ARP and RARP should always perform path find */ - unicast_arp_send(skb, dev, cb); + unicast_arp_send(skb, dev, phdr); return NETDEV_TX_OK; default: /* ethertype not supported by IPoIB */ @@ -1086,11 +1095,13 @@ send_using_neigh: goto unref; } } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr)); + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr)); goto unref; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, sizeof(*phdr)); spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); spin_unlock_irqrestore(&priv->lock, flags); @@ -1122,8 +1133,8 @@ static int ipoib_hard_header(struct sk_buff *skb, unsigned short type, const void *daddr, const void *saddr, unsigned len) { + struct ipoib_pseudo_header *phdr; struct ipoib_header *header; - struct ipoib_cb *cb = ipoib_skb_cb(skb); header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -1132,12 +1143,13 @@ static int ipoib_hard_header(struct sk_buff *skb, /* * we don't rely on dst_entry structure, always stuff the - * destination address into skb->cb so we can figure out where + * destination address into skb hard header so we can figure out where * to send the packet later. */ - memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); + phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr)); + memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); - return sizeof *header; + return IPOIB_HARD_LEN; } static void ipoib_set_mcast_list(struct net_device *dev) @@ -1759,7 +1771,7 @@ void ipoib_setup(struct net_device *dev) dev->flags |= IFF_BROADCAST | IFF_MULTICAST; - dev->hard_header_len = IPOIB_ENCAP_LEN; + dev->hard_header_len = IPOIB_HARD_LEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = ipoib_sendq_size * 2; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index d3394b6..1909dd2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -796,9 +796,11 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) __ipoib_mcast_add(dev, mcast); list_add_tail(&mcast->list, &priv->multicast_list); } - if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) + if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, sizeof(struct ipoib_pseudo_header)); skb_queue_tail(&mcast->pkt_queue, skb); - else { + } else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } -- cgit v0.10.2 From 9d6280da39c04832d6abf5f4ecc8175c9aad91c0 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 13 Oct 2016 18:50:02 +0200 Subject: IPv6: Drop the temporary address regen_timer The randomized interface identifier (rndid) was periodically updated from the regen_timer timer. Simplify the code by updating the rndid only when needed by ipv6_try_regen_rndid(). This makes the follow-up DESYNC_FACTOR fix much simpler. Also it fixes a reference counting error in this error path, where an in6_dev_put was missing: err = addrconf_sysctl_register(ndev); if (err) { ipv6_mc_destroy_dev(ndev); - del_timer(&ndev->regen_timer); snmp6_unregister_dev(ndev); goto err_release; Signed-off-by: Jiri Bohac Signed-off-by: David S. Miller diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 515352c..ae70735 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -191,7 +191,6 @@ struct inet6_dev { int dead; u8 rndid[8]; - struct timer_list regen_timer; struct list_head tempaddr_list; struct in6_addr token; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9faafe5..58c3d73 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -147,9 +147,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -static void __ipv6_regen_rndid(struct inet6_dev *idev); -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); -static void ipv6_regen_rndid(unsigned long data); +static void ipv6_regen_rndid(struct inet6_dev *idev); +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); @@ -409,9 +408,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) goto err_release; } - /* One reference from device. We must do this before - * we invoke __ipv6_regen_rndid(). - */ + /* One reference from device. */ in6_dev_hold(ndev); if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -425,17 +422,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) #endif INIT_LIST_HEAD(&ndev->tempaddr_list); - setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { ndev->cnf.use_tempaddr = -1; - } else { - in6_dev_hold(ndev); - ipv6_regen_rndid((unsigned long) ndev); - } + } else + ipv6_regen_rndid(ndev); ndev->token = in6addr_any; @@ -447,7 +441,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) err = addrconf_sysctl_register(ndev); if (err) { ipv6_mc_destroy_dev(ndev); - del_timer(&ndev->regen_timer); snmp6_unregister_dev(ndev); goto err_release; } @@ -1222,7 +1215,7 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - __ipv6_try_regen_rndid(idev, tmpaddr); + ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, @@ -2150,7 +2143,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) } /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static void __ipv6_regen_rndid(struct inet6_dev *idev) +static void ipv6_regen_rndid(struct inet6_dev *idev) { regen: get_random_bytes(idev->rndid, sizeof(idev->rndid)); @@ -2179,43 +2172,10 @@ regen: } } -static void ipv6_regen_rndid(unsigned long data) -{ - struct inet6_dev *idev = (struct inet6_dev *) data; - unsigned long expires; - - rcu_read_lock_bh(); - write_lock_bh(&idev->lock); - - if (idev->dead) - goto out; - - __ipv6_regen_rndid(idev); - - expires = jiffies + - idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) - - idev->cnf.max_desync_factor * HZ; - if (time_before(expires, jiffies)) { - pr_warn("%s: too short regeneration interval; timer disabled for %s\n", - __func__, idev->dev->name); - goto out; - } - - if (!mod_timer(&idev->regen_timer, expires)) - in6_dev_hold(idev); - -out: - write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); - in6_dev_put(idev); -} - -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - __ipv6_regen_rndid(idev); + ipv6_regen_rndid(idev); } /* @@ -3594,9 +3554,6 @@ restart: if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); - if (how && del_timer(&idev->regen_timer)) - in6_dev_put(idev); - /* Step 3: clear tempaddr list */ while (!list_empty(&idev->tempaddr_list)) { ifa = list_first_entry(&idev->tempaddr_list, -- cgit v0.10.2 From 76506a986dc31394fd1f2741db037d29c7e57843 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 13 Oct 2016 18:52:15 +0200 Subject: IPv6: fix DESYNC_FACTOR The IPv6 temporary address generation uses a variable called DESYNC_FACTOR to prevent hosts updating the addresses at the same time. Quoting RFC 4941: ... The value DESYNC_FACTOR is a random value (different for each client) that ensures that clients don't synchronize with each other and generate new addresses at exactly the same time ... DESYNC_FACTOR is defined as: DESYNC_FACTOR -- A random value within the range 0 - MAX_DESYNC_FACTOR. It is computed once at system start (rather than each time it is used) and must never be greater than (TEMP_VALID_LIFETIME - REGEN_ADVANCE). First, I believe the RFC has a typo in it and meant to say: "and must never be greater than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE)" The reason is that at various places in the RFC, DESYNC_FACTOR is used in a calculation like (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR) or (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE - DESYNC_FACTOR). It needs to be smaller than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE) for the result of these calculations to be larger than zero. It's never used in a calculation together with TEMP_VALID_LIFETIME. I already submitted an errata to the rfc-editor: https://www.rfc-editor.org/errata_search.php?rfc=4941 The Linux implementation of DESYNC_FACTOR is very wrong: max_desync_factor is used in places DESYNC_FACTOR should be used. max_desync_factor is initialized to the RFC-recommended value for MAX_DESYNC_FACTOR (600) but the whole point is to get a _random_ value. And nothing ensures that the value used is not greater than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE), which leads to underflows. The effect can easily be observed when setting the temp_prefered_lft sysctl e.g. to 60. The preferred lifetime of the temporary addresses will be bogus. TEMP_PREFERRED_LIFETIME and REGEN_ADVANCE are not constants and can be influenced by these three sysctls: regen_max_retry, dad_transmits and temp_prefered_lft. Thus, the upper bound for desync_factor needs to be re-calculated each time a new address is generated and if desync_factor is larger than the new upper bound, a new random value needs to be re-generated. And since we already have max_desync_factor configurable per interface, we also need to calculate and store desync_factor per interface. Signed-off-by: Jiri Bohac Signed-off-by: David S. Miller diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index ae70735..b0576cb 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -190,6 +190,7 @@ struct inet6_dev { __u32 if_flags; int dead; + u32 desync_factor; u8 rndid[8]; struct list_head tempaddr_list; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 58c3d73..cc7c26d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) #endif INIT_LIST_HEAD(&ndev->tempaddr_list); + ndev->desync_factor = U32_MAX; if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || dev->type == ARPHRD_TUNNEL6 || @@ -1183,6 +1184,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i int ret = 0; u32 addr_flags; unsigned long now = jiffies; + long max_desync_factor; write_lock_bh(&idev->lock); if (ift) { @@ -1218,20 +1220,41 @@ retry: ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; + + regen_advance = idev->cnf.regen_max_retry * + idev->cnf.dad_transmits * + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + + /* recalculate max_desync_factor each time and update + * idev->desync_factor if it's larger + */ + max_desync_factor = min_t(__u32, + idev->cnf.max_desync_factor, + idev->cnf.temp_prefered_lft - regen_advance); + + if (unlikely(idev->desync_factor > max_desync_factor)) { + if (max_desync_factor > 0) { + get_random_bytes(&idev->desync_factor, + sizeof(idev->desync_factor)); + idev->desync_factor %= max_desync_factor; + } else { + idev->desync_factor = 0; + } + } + tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); - tmp_prefered_lft = min_t(__u32, - ifp->prefered_lft, - idev->cnf.temp_prefered_lft + age - - idev->cnf.max_desync_factor); + tmp_prefered_lft = idev->cnf.temp_prefered_lft + age - + idev->desync_factor; + /* guard against underflow in case of concurrent updates to cnf */ + if (unlikely(tmp_prefered_lft < 0)) + tmp_prefered_lft = 0; + tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft); tmp_plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); - regen_advance = idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred @@ -2316,7 +2339,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, max_valid = 0; max_prefered = idev->cnf.temp_prefered_lft - - idev->cnf.max_desync_factor - age; + idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; -- cgit v0.10.2 From ce6b04ee8b112cc9d5ef41ba697a3ffabc630f42 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 13 Oct 2016 22:57:01 +0300 Subject: qed: Fix static checker warning. Smatch compains about qed_roce_ll2_tx() dereference of the 'cdev' variable while testing its validity later. As the validation checking is an over-kill [variable would always be set], simply remove it. Reported-by: Dan Carpenter Fixes: abd49676c707 ("qed: Add RoCE ll2 & GSI support") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 76831a3..c73638c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -2809,11 +2809,6 @@ static int qed_roce_ll2_stop(struct qed_dev *cdev) struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; int rc; - if (!cdev) { - DP_ERR(cdev, "qed roce ll2 stop: invalid cdev\n"); - return -EINVAL; - } - if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) { DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n"); return -EINVAL; @@ -2850,7 +2845,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev, int rc; int i; - if (!cdev || !pkt || !params) { + if (!pkt || !params) { DP_ERR(cdev, "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n", cdev, pkt, params); -- cgit v0.10.2 From 0189efb8f4f830b9ac7a7c56c0c6e260859e950d Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 13 Oct 2016 22:57:02 +0300 Subject: qed*: Fix Kconfig dependencies with INFINIBAND_QEDR The qedr driver would require a tristate Kconfig option [to allow it to compile as a module], and toward that end we've added the INFINIBAND_QEDR option. But as we've made the compilation of the qed/qede infrastructure required for RoCE dependent on the option we'd be facing linking difficulties in case that QED=y or QEDE=y, and INFINIBAND_QEDR=m. To resolve this, we seperate between the INFINIBAND_QEDR option and the infrastructure support in qed/qede by introducing a new QED_RDMA option which would be selected by INFINIBAND_QEDR but would be a boolean instead of a tristate; Following that, the qed/qede is fixed based on this new option so that all config combinations would be supported. Fixes: cee9fbd8e2e9 ("qede: add qedr framework") Reported-by: Arnd Bergmann Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 0df1391f9..7756772 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -107,10 +107,14 @@ config QEDE ---help--- This enables the support for ... +config QED_RDMA + bool + config INFINIBAND_QEDR tristate "QLogic qede RoCE sources [debug]" depends on QEDE && 64BIT select QED_LL2 + select QED_RDMA default n ---help--- This provides a temporary node that allows the compilation diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index cda0af7..967acf3 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -5,4 +5,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o -qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o +qed-$(CONFIG_QED_RDMA) += qed_roce.o diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 82370a1..277db78 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -47,13 +47,8 @@ #define TM_ALIGN BIT(TM_SHIFT) #define TM_ELEM_SIZE 4 -/* ILT constants */ -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) /* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */ -#define ILT_DEFAULT_HW_P_SIZE 4 -#else -#define ILT_DEFAULT_HW_P_SIZE 3 -#endif +#define ILT_DEFAULT_HW_P_SIZE (IS_ENABLED(CONFIG_QED_RDMA) ? 4 : 3) #define ILT_PAGE_IN_BYTES(hw_p_size) (1U << ((hw_p_size) + 12)) #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 754f6a9..21adf52 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1422,19 +1422,19 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) u32 *feat_num = p_hwfn->hw_info.feat_num; int num_features = 1; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the - * status blocks equally between L2 / RoCE but with consideration as - * to how many l2 queues / cnqs we have - */ - if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + if (IS_ENABLED(CONFIG_QED_RDMA) && + p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide + * the status blocks equally between L2 / RoCE but with + * consideration as to how many l2 queues / cnqs we have. + */ num_features++; feat_num[QED_RDMA_CNQ] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM)); } -#endif + feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_L2_QUEUE)); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 02a8be2..7856e52 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -38,6 +38,7 @@ #include "qed_mcp.h" #include "qed_reg_addr.h" #include "qed_sp.h" +#include "qed_roce.h" #define QED_LL2_RX_REGISTERED(ll2) ((ll2)->rx_queue.b_cb_registred) #define QED_LL2_TX_REGISTERED(ll2) ((ll2)->tx_queue.b_cb_registred) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index 80a5dc2..4e3d62a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -293,24 +293,4 @@ void qed_ll2_setup(struct qed_hwfn *p_hwfn, */ void qed_ll2_free(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_connections); -void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - void *cookie, - dma_addr_t rx_buf_addr, - u16 data_length, - u8 data_length_error, - u16 parse_flags, - u16 vlan, - u32 src_mac_addr_hi, - u16 src_mac_addr_lo, bool b_last_packet); -void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - void *cookie, - dma_addr_t first_frag_addr, - bool b_last_fragment, bool b_last_packet); -void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - void *cookie, - dma_addr_t first_frag_addr, - bool b_last_fragment, bool b_last_packet); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 4ee3151..6eb2401 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -33,10 +33,8 @@ #include "qed_hw.h" #include "qed_selftest.h" -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) #define QED_ROCE_QPS (8192) #define QED_ROCE_DPIS (8) -#endif static char version[] = "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; @@ -682,9 +680,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, enum qed_int_mode int_mode) { struct qed_sb_cnt_info sb_cnt_info; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - int num_l2_queues; -#endif + int num_l2_queues = 0; int rc; int i; @@ -715,8 +711,9 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors - cdev->num_hwfns; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - num_l2_queues = 0; + if (!IS_ENABLED(CONFIG_QED_RDMA)) + return 0; + for_each_hwfn(cdev, i) num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE); @@ -738,7 +735,6 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n", cdev->int_params.rdma_msix_cnt, cdev->int_params.rdma_msix_base); -#endif return 0; } @@ -843,18 +839,20 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - params->rdma_pf_params.num_qps = QED_ROCE_QPS; - params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; - /* divide by 3 the MRs to avoid MF ILT overflow */ - params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; - params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; -#endif for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; p_hwfn->pf_params = *params; } + + if (!IS_ENABLED(CONFIG_QED_RDMA)) + return; + + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; } static int qed_slowpath_start(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index c73638c..187df38 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -129,17 +129,12 @@ static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, } } -u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) +static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) { /* First sb id for RoCE is after all the l2 sb */ return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id; } -u32 qed_rdma_query_cau_timer_res(void *rdma_cxt) -{ - return QED_CAU_DEF_RX_TIMER_RES; -} - static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_rdma_start_in_params *params) @@ -275,7 +270,7 @@ free_rdma_info: return rc; } -void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) +static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) { struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; @@ -527,6 +522,26 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } +static int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n"); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + if (rc) + goto out; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid); +out: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); + return rc; +} + static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn) { struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; @@ -573,7 +588,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn, return qed_rdma_start_fw(p_hwfn, params, p_ptt); } -int qed_rdma_stop(void *rdma_cxt) +static int qed_rdma_stop(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_close_func_ramrod_data *p_ramrod; @@ -629,8 +644,8 @@ out: return rc; } -int qed_rdma_add_user(void *rdma_cxt, - struct qed_rdma_add_user_out_params *out_params) +static int qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; u32 dpi_start_offset; @@ -664,7 +679,7 @@ int qed_rdma_add_user(void *rdma_cxt, return rc; } -struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) +static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port; @@ -680,7 +695,7 @@ struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) return p_port; } -struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) +static struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -690,7 +705,7 @@ struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) return p_hwfn->p_rdma_info->dev; } -void qed_rdma_free_tid(void *rdma_cxt, u32 itid) +static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -701,27 +716,7 @@ void qed_rdma_free_tid(void *rdma_cxt, u32 itid) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } -int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) -{ - struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; - int rc; - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n"); - - spin_lock_bh(&p_hwfn->p_rdma_info->lock); - rc = qed_rdma_bmap_alloc_id(p_hwfn, - &p_hwfn->p_rdma_info->tid_map, itid); - spin_unlock_bh(&p_hwfn->p_rdma_info->lock); - if (rc) - goto out; - - rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid); -out: - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); - return rc; -} - -void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) +static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) { struct qed_hwfn *p_hwfn; u16 qz_num; @@ -816,7 +811,7 @@ static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info) return 0; } -int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) +static int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; u32 returned_id; @@ -1985,9 +1980,9 @@ int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) return 0; } -int qed_rdma_query_qp(void *rdma_cxt, - struct qed_rdma_qp *qp, - struct qed_rdma_query_qp_out_params *out_params) +static int qed_rdma_query_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; int rc; @@ -2022,7 +2017,7 @@ int qed_rdma_query_qp(void *rdma_cxt, return rc; } -int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) +static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; int rc = 0; @@ -2215,9 +2210,9 @@ static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn, return rc; } -int qed_rdma_modify_qp(void *rdma_cxt, - struct qed_rdma_qp *qp, - struct qed_rdma_modify_qp_in_params *params) +static int qed_rdma_modify_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; enum qed_roce_qp_state prev_state; @@ -2312,8 +2307,9 @@ int qed_rdma_modify_qp(void *rdma_cxt, return rc; } -int qed_rdma_register_tid(void *rdma_cxt, - struct qed_rdma_register_tid_in_params *params) +static int +qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_register_tid_ramrod_data *p_ramrod; @@ -2450,7 +2446,7 @@ int qed_rdma_register_tid(void *rdma_cxt, return rc; } -int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) +static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_deregister_tid_ramrod_data *p_ramrod; @@ -2561,7 +2557,8 @@ void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_rdma_dpm_conf(p_hwfn, p_ptt); } -int qed_rdma_start(void *rdma_cxt, struct qed_rdma_start_in_params *params) +static int qed_rdma_start(void *rdma_cxt, + struct qed_rdma_start_in_params *params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct qed_ptt *p_ptt; @@ -2601,7 +2598,7 @@ static int qed_rdma_init(struct qed_dev *cdev, return qed_rdma_start(QED_LEADING_HWFN(cdev), params); } -void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) +static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index 2f091e8..6914131 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -181,36 +181,55 @@ struct qed_rdma_qp { dma_addr_t shared_queue_phys_addr; }; -int -qed_rdma_add_user(void *rdma_cxt, - struct qed_rdma_add_user_out_params *out_params); -int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd); -int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid); -int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid); -void qed_rdma_free_tid(void *rdma_cxt, u32 tid); -struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt); -struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt); -int -qed_rdma_register_tid(void *rdma_cxt, - struct qed_rdma_register_tid_in_params *params); -void qed_rdma_remove_user(void *rdma_cxt, u16 dpi); -int qed_rdma_start(void *p_hwfn, struct qed_rdma_start_in_params *params); -int qed_rdma_stop(void *rdma_cxt); -u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id); -u32 qed_rdma_query_cau_timer_res(void *p_hwfn); -void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod); -void qed_rdma_resc_free(struct qed_hwfn *p_hwfn); +#if IS_ENABLED(CONFIG_QED_RDMA) +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe); -int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp); -int qed_rdma_modify_qp(void *rdma_cxt, struct qed_rdma_qp *qp, - struct qed_rdma_modify_qp_in_params *params); -int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_qp *qp, - struct qed_rdma_query_qp_out_params *out_params); - -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) -void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet); #else -void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +static inline void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) {} +static inline void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) {} +static inline void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) {} +static inline void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, + bool b_last_packet) {} #endif #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index caff415..9fbaf94 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -28,9 +28,7 @@ #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_sriov.h" -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) #include "qed_roce.h" -#endif /*************************************************************************** * Structures & Definitions @@ -240,11 +238,9 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) { switch (p_eqe->protocol_id) { -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) case PROTOCOLID_ROCE: qed_async_roce_event(p_hwfn, p_eqe); return 0; -#endif case PROTOCOLID_COMMON: return qed_sriov_eqe_event(p_hwfn, p_eqe->opcode, diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile index 28dc589..048a230 100644 --- a/drivers/net/ethernet/qlogic/qede/Makefile +++ b/drivers/net/ethernet/qlogic/qede/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o qede-y := qede_main.o qede_ethtool.o qede-$(CONFIG_DCB) += qede_dcbnl.o -qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o +qede-$(CONFIG_QED_RDMA) += qede_roce.o diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h index 99fbe6d..f48d64b 100644 --- a/include/linux/qed/qede_roce.h +++ b/include/linux/qed/qede_roce.h @@ -68,7 +68,7 @@ void qede_roce_unregister_driver(struct qedr_driver *drv); bool qede_roce_supported(struct qede_dev *dev); -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#if IS_ENABLED(CONFIG_QED_RDMA) int qede_roce_dev_add(struct qede_dev *dev); void qede_roce_dev_event_open(struct qede_dev *dev); void qede_roce_dev_event_close(struct qede_dev *dev); -- cgit v0.10.2 From 8c93beaf5714b9ddfa4a0b4bcf89725d2021e903 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 13 Oct 2016 22:57:03 +0300 Subject: qed: Additional work toward cleaning C=1 This cleans many of the warnings that would arise in qed as a result of compilations with C=1; Most of those are the addition of missing 'static' to functions, although there are several other fixes as well. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 277db78..0c42c24 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -344,14 +344,14 @@ static struct qed_tid_seg *qed_cxt_tid_seg_info(struct qed_hwfn *p_hwfn, return NULL; } -void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs) +static void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs) { struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; p_mgr->srq_count = num_srqs; } -u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn) +static u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn) { struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; @@ -1799,8 +1799,8 @@ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info) return 0; } -void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, - struct qed_rdma_pf_params *p_params) +static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, + struct qed_rdma_pf_params *p_params) { u32 num_cons, num_tasks, num_qps, num_mrs, num_srqs; enum protocol_type proto; diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 88e7d5b..68f19ca 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -405,7 +405,7 @@ struct phy_defs { /***************************** Constant Arrays *******************************/ /* Debug arrays */ -static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} }; +static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} }; /* Chip constant definitions array */ static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = { @@ -4028,10 +4028,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, } /* Dump MCP Trace */ -enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) { u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords; u32 trace_meta_size_dwords, running_bundle_id, offset = 0; @@ -4130,10 +4130,10 @@ enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, } /* Dump GRC FIFO */ -enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) { u32 offset = 0, dwords_read, size_param_offset; bool fifo_has_data; @@ -4192,10 +4192,10 @@ enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, } /* Dump IGU FIFO */ -enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) { u32 offset = 0, dwords_read, size_param_offset; bool fifo_has_data; @@ -4255,10 +4255,11 @@ enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, } /* Protection Override dump */ -enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u32 *num_dumped_dwords) { u32 offset = 0, size_param_offset, override_window_dwords; @@ -6339,10 +6340,11 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, } /* Wrapper for unifying the idle_chk and mcp_trace api */ -enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn, - u32 *dump_buf, - u32 num_dumped_dwords, - char *results_buf) +static enum dbg_status +qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) { u32 num_errors, num_warnnings; @@ -6413,8 +6415,8 @@ static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size) #define QED_RESULTS_BUF_MIN_SIZE 16 /* Generic function for decoding debug feature info */ -enum dbg_status format_feature(struct qed_hwfn *p_hwfn, - enum qed_dbg_features feature_idx) +static enum dbg_status format_feature(struct qed_hwfn *p_hwfn, + enum qed_dbg_features feature_idx) { struct qed_dbg_feature *feature = &p_hwfn->cdev->dbg_params.features[feature_idx]; @@ -6480,8 +6482,9 @@ enum dbg_status format_feature(struct qed_hwfn *p_hwfn, } /* Generic function for performing the dump of a debug feature. */ -enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - enum qed_dbg_features feature_idx) +static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_dbg_features feature_idx) { struct qed_dbg_feature *feature = &p_hwfn->cdev->dbg_params.features[feature_idx]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 21adf52..edae5fc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -497,12 +497,13 @@ int qed_resc_alloc(struct qed_dev *cdev) if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { num_cons = qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ROCE, - 0) * 2; + NULL) * 2; n_eqes += num_cons + 2 * MAX_NUM_VFS_BB; } else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) { num_cons = qed_cxt_get_proto_cid_count(p_hwfn, - PROTOCOLID_ISCSI, 0); + PROTOCOLID_ISCSI, + NULL); n_eqes += 2 * num_cons; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 7856e52..ce171a8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -141,11 +141,11 @@ static void qed_ll2_kill_buffers(struct qed_dev *cdev) qed_ll2_dealloc_buffer(cdev, buffer); } -void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - struct qed_ll2_rx_packet *p_pkt, - struct core_rx_fast_path_cqe *p_cqe, - bool b_last_packet) +static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + struct qed_ll2_rx_packet *p_pkt, + struct core_rx_fast_path_cqe *p_cqe, + bool b_last_packet) { u16 packet_length = le16_to_cpu(p_cqe->packet_length); struct qed_ll2_buffer *buffer = p_pkt->cookie; @@ -516,7 +516,7 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) return rc; } -void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) { struct qed_ll2_info *p_ll2_conn = NULL; struct qed_ll2_rx_packet *p_pkt = NULL; @@ -1124,9 +1124,6 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(start_bd->addr, first_frag); start_bd->nbytes = cpu_to_le16(first_frag_len); - SET_FIELD(start_bd->bd_flags.as_bitfield, CORE_TX_BD_FLAGS_ROCE_FLAV, - type); - DP_VERBOSE(p_hwfn, (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6eb2401..8dc3f46 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1430,7 +1430,7 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } -struct qed_selftest_ops qed_selftest_ops_pass = { +static struct qed_selftest_ops qed_selftest_ops_pass = { .selftest_memory = &qed_selftest_memory, .selftest_interrupt = &qed_selftest_interrupt, .selftest_register = &qed_selftest_register, diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 187df38..f3a825a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -157,7 +157,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, p_hwfn->p_rdma_info = p_rdma_info; p_rdma_info->proto = PROTOCOLID_ROCE; - num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0); + num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, + NULL); p_rdma_info->num_qps = num_cons / 2; @@ -831,7 +832,7 @@ static int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) return rc; } -void qed_rdma_free_pd(void *rdma_cxt, u16 pd) +static void qed_rdma_free_pd(void *rdma_cxt, u16 pd) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -868,8 +869,9 @@ qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid) return toggle_bit; } -int qed_rdma_create_cq(void *rdma_cxt, - struct qed_rdma_create_cq_in_params *params, u16 *icid) +static int qed_rdma_create_cq(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, + u16 *icid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; @@ -952,98 +954,10 @@ err: return rc; } -int qed_rdma_resize_cq(void *rdma_cxt, - struct qed_rdma_resize_cq_in_params *in_params, - struct qed_rdma_resize_cq_out_params *out_params) -{ - struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; - struct rdma_resize_cq_output_params *p_ramrod_res; - struct rdma_resize_cq_ramrod_data *p_ramrod; - enum qed_rdma_toggle_bit toggle_bit; - struct qed_sp_init_data init_data; - struct qed_spq_entry *p_ent; - dma_addr_t ramrod_res_phys; - u8 fw_return_code; - int rc = -ENOMEM; - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); - - p_ramrod_res = - (struct rdma_resize_cq_output_params *) - dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(struct rdma_resize_cq_output_params), - &ramrod_res_phys, GFP_KERNEL); - if (!p_ramrod_res) { - DP_NOTICE(p_hwfn, - "qed resize cq failed: cannot allocate memory (ramrod)\n"); - return rc; - } - - /* Get SPQ entry */ - memset(&init_data, 0, sizeof(init_data)); - init_data.cid = in_params->icid; - init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; - init_data.comp_mode = QED_SPQ_MODE_EBLOCK; - - rc = qed_sp_init_request(p_hwfn, &p_ent, - RDMA_RAMROD_RESIZE_CQ, - p_hwfn->p_rdma_info->proto, &init_data); - if (rc) - goto err; - - p_ramrod = &p_ent->ramrod.rdma_resize_cq; - - p_ramrod->flags = 0; - - /* toggle the bit for every resize or create cq for a given icid */ - toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, - in_params->icid); - - SET_FIELD(p_ramrod->flags, - RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit); - - SET_FIELD(p_ramrod->flags, - RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL, - in_params->pbl_two_level); - - p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12; - p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages); - p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size); - DMA_REGPAIR_LE(p_ramrod->pbl_addr, in_params->pbl_ptr); - DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); - - rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); - if (rc) - goto err; - - if (fw_return_code != RDMA_RETURN_OK) { - DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); - rc = -EINVAL; - goto err; - } - - out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod); - out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons); - - dma_free_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(struct rdma_resize_cq_output_params), - p_ramrod_res, ramrod_res_phys); - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc); - - return rc; - -err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(struct rdma_resize_cq_output_params), - p_ramrod_res, ramrod_res_phys); - DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc); - - return rc; -} - -int qed_rdma_destroy_cq(void *rdma_cxt, - struct qed_rdma_destroy_cq_in_params *in_params, - struct qed_rdma_destroy_cq_out_params *out_params) +static int +qed_rdma_destroy_cq(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *in_params, + struct qed_rdma_destroy_cq_out_params *out_params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_destroy_cq_output_params *p_ramrod_res; @@ -1164,7 +1078,7 @@ static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) return flavor; } -int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) +static int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) { struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; u32 responder_icid; @@ -1788,9 +1702,9 @@ err: return rc; } -int qed_roce_query_qp(struct qed_hwfn *p_hwfn, - struct qed_rdma_qp *qp, - struct qed_rdma_query_qp_out_params *out_params) +static int qed_roce_query_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) { struct roce_query_qp_resp_output_params *p_resp_ramrod_res; struct roce_query_qp_req_output_params *p_req_ramrod_res; @@ -1931,7 +1845,7 @@ err_resp: return rc; } -int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) { u32 num_invalidated_mw = 0; u32 num_bound_mw = 0; @@ -2033,7 +1947,7 @@ static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) return rc; } -struct qed_rdma_qp * +static struct qed_rdma_qp * qed_rdma_create_qp(void *rdma_cxt, struct qed_rdma_create_qp_in_params *in_params, struct qed_rdma_create_qp_out_params *out_params) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index 6914131..279f342 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -95,26 +95,6 @@ struct qed_rdma_info { enum protocol_type proto; }; -struct qed_rdma_resize_cq_in_params { - u16 icid; - u32 cq_size; - bool pbl_two_level; - u64 pbl_ptr; - u16 pbl_num_pages; - u8 pbl_page_size_log; -}; - -struct qed_rdma_resize_cq_out_params { - u32 prod; - u32 cons; -}; - -struct qed_rdma_resize_cnq_in_params { - u32 cnq_id; - u32 pbl_page_size_log; - u64 pbl_ptr; -}; - struct qed_rdma_qp { struct regpair qp_handle; struct regpair qp_handle_async; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 652c908..b2c08e4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -80,7 +80,6 @@ union ramrod_data { struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp; struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req; struct rdma_create_cq_ramrod_data rdma_create_cq; - struct rdma_resize_cq_ramrod_data rdma_resize_cq; struct rdma_destroy_cq_ramrod_data rdma_destroy_cq; struct rdma_srq_create_ramrod_data rdma_create_srq; struct rdma_srq_destroy_ramrod_data rdma_destroy_srq; -- cgit v0.10.2 From 958b3d396d7f80755e2c2e6a8f873a669f38de10 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Thu, 13 Oct 2016 13:13:11 -0700 Subject: net/mlx4_en: fixup xdp tx irq to match rx In cases where the number of tx rings is not a multiple of the number of rx rings, the tx completion event will be handled on a different core from the transmit and population of the ring. Races on the ring will lead to a double-free of the page, and possibly other corruption. The rings are initialized by default with a valid multiple of rings, based on the number of cpus, therefore an invalid configuration requires ethtool to change the ring layout. For instance 'ethtool -L eth0 rx 9 tx 8' will cause packets received on rx0, and XDP_TX'd to tx48, to be completed on cpu3 (48 % 9 == 3). Resolve this discrepancy by shifting the irq for the xdp tx queues to start again from 0, modulo rx_ring_num. Fixes: 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support") Reported-by: Jesper Dangaard Brouer Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 132cea6..e3be7e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -127,7 +127,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, /* For TX we use the same irq per ring we assigned for the RX */ struct mlx4_en_cq *rx_cq; - + int xdp_index; + + /* The xdp tx irq must align with the rx ring that forwards to + * it, so reindex these from 0. This should only happen when + * tx_ring_num is not a multiple of rx_ring_num. + */ + xdp_index = (priv->xdp_ring_num - priv->tx_ring_num) + cq_idx; + if (xdp_index >= 0) + cq_idx = xdp_index; cq_idx = cq_idx % priv->rx_ring_num; rx_cq = priv->rx_cq[cq_idx]; cq->vector = rx_cq->vector; -- cgit v0.10.2 From 85a624403c77c3f074931aefdfced59f61b668cb Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 13 Oct 2016 16:13:55 -0700 Subject: ethtool: silence warning on bit loss Sparse was complaining when we went to prototype some code using ethtool_cmd_speed_set and SPEED_100000, which uses the upper 16 bits of __u32 speed for the first time. CHECK ... .../uapi/linux/ethtool.h:123:28: warning: cast truncates bits from constant value (186a0 becomes 86a0) The warning is actually bogus, as no bits are really lost, but we can get rid of the sparse warning with this one small change. Reported-by: Preethi Banala Signed-off-by: Jesse Brandeburg Signed-off-by: David S. Miller diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 099a420..8e54723 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -119,8 +119,7 @@ struct ethtool_cmd { static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep, __u32 speed) { - - ep->speed = (__u16)speed; + ep->speed = (__u16)(speed & 0xFFFF); ep->speed_hi = (__u16)(speed >> 16); } -- cgit v0.10.2 From 610df1d2342d639ec87a8fb5290ca4799d91d59b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 13 Oct 2016 16:43:16 -0700 Subject: net: asix: Avoid looping when the device does not respond Check answers from USB stack and avoid re-sending the request multiple times if the device does not respond. This fixes the following problem, observed with a probably flaky adapter. [62108.732707] usb 1-3: new high-speed USB device number 5 using xhci_hcd [62108.914421] usb 1-3: New USB device found, idVendor=0b95, idProduct=7720 [62108.914463] usb 1-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [62108.914476] usb 1-3: Product: AX88x72A [62108.914486] usb 1-3: Manufacturer: ASIX Elec. Corp. [62108.914495] usb 1-3: SerialNumber: 000001 [62114.109109] asix 1-3:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0000: -110 [62114.109139] asix 1-3:1.0 (unnamed net_device) (uninitialized): Failed to send software reset: ffffff92 [62119.109048] asix 1-3:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0000: -110 ... Since the USB timeout is 5 seconds, and the operation is retried 30 times, this results in [62278.180353] INFO: task mtpd:1725 blocked for more than 120 seconds. [62278.180373] Tainted: G W 3.18.0-13298-g94ace9e #1 [62278.180383] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. ... [62278.180957] kworker/2:0 D 0000000000000000 0 5744 2 0x00000000 [62278.180978] Workqueue: usb_hub_wq hub_event [62278.181029] ffff880177f833b8 0000000000000046 ffff88017fd00000 ffff88017b126d80 [62278.181048] ffff880177f83fd8 ffff880065a71b60 0000000000013340 ffff880065a71b60 [62278.181065] 0000000000000286 0000000103b1c199 0000000000001388 0000000000000002 [62278.181081] Call Trace: [62278.181092] [] ? console_conditional_schedule+0x2c/0x2c [62278.181105] [] schedule+0x69/0x6b [62278.181117] [] schedule_timeout+0xe3/0x11d [62278.181133] [] ? trace_timer_start+0x51/0x51 [62278.181146] [] do_wait_for_common+0x12f/0x16c [62278.181162] [] ? wake_up_process+0x39/0x39 [62278.181174] [] wait_for_common+0x52/0x6d [62278.181187] [] wait_for_completion_timeout+0x13/0x15 [62278.181201] [] usb_start_wait_urb+0x93/0xf1 [62278.181214] [] usb_control_msg+0xe1/0x11d [62278.181230] [] usbnet_write_cmd+0x9c/0xc6 [usbnet] [62278.181286] [] asix_write_cmd+0x4e/0x7e [asix] [62278.181300] [] asix_set_sw_mii+0x25/0x4e [asix] [62278.181314] [] asix_mdio_read+0x51/0x109 [asix] ... Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index f79eb12..125cff5 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -433,13 +433,13 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) mutex_lock(&dev->phy_mutex); do { ret = asix_set_sw_mii(dev, 0); - if (ret == -ENODEV) + if (ret == -ENODEV || ret == -ETIMEDOUT) break; usleep_range(1000, 1100); ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 0); } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); - if (ret == -ENODEV) { + if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; } @@ -497,13 +497,13 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) mutex_lock(&dev->phy_mutex); do { ret = asix_set_sw_mii(dev, 1); - if (ret == -ENODEV) + if (ret == -ENODEV || ret == -ETIMEDOUT) break; usleep_range(1000, 1100); ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 1); } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); - if (ret == -ENODEV) { + if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; } -- cgit v0.10.2 From f0076436136751359e0886f3302a2a0b3a28ba6e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 14 Oct 2016 14:40:33 +0100 Subject: r8169: set coherent DMA mask as well as streaming DMA mask PCI devices that are 64-bit DMA capable should set the coherent DMA mask as well as the streaming DMA mask. On some architectures, these are managed separately, and so the coherent DMA mask will be left at its default value of 32 if it is not set explicitly. This results in errors such as r8169 Gigabit Ethernet driver 2.3LK-NAPI loaded hwdev DMA mask = 0x00000000ffffffff, dev_addr = 0x00000080fbfff000 swiotlb: coherent allocation failed for device 0000:02:00.0 size=4096 CPU: 0 PID: 1062 Comm: systemd-udevd Not tainted 4.8.0+ #35 Hardware name: AMD Seattle/Seattle, BIOS 10:53:24 Oct 13 2016 on systems without memory that is 32-bit addressable by PCI devices. Signed-off-by: Ard Biesheuvel Acked-by: Francois Romieu Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e55638c..bf000d8 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -8273,7 +8273,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if ((sizeof(dma_addr_t) > 4) && (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) && tp->mac_version >= RTL_GIGA_MAC_VER_18)) && - !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && + !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { /* CPlusCmd Dual Access Cycle is only needed for non-PCIe */ if (!pci_is_pcie(pdev)) -- cgit v0.10.2 From 93966b715b32a783a1641f5a385901bbfab04733 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 14 Oct 2016 14:14:35 -0500 Subject: net: qcom/emac: disable interrupts before calling phy_disconnect There is a race condition that can occur if EMAC interrupts are enabled when phy_disconnect() is called. phy_disconnect() sets adjust_link to NULL. When an interrupt occurs, the ISR might call phy_mac_interrupt(), which wakes up the workqueue function phy_state_machine(). This function might reference adjust_link, thereby causing a null pointer exception. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index e97968e..6fb3bee 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1021,14 +1021,18 @@ void emac_mac_down(struct emac_adapter *adpt) napi_disable(&adpt->rx_q.napi); phy_stop(adpt->phydev); - phy_disconnect(adpt->phydev); - /* disable mac irq */ + /* Interrupts must be disabled before the PHY is disconnected, to + * avoid a race condition where adjust_link is null when we get + * an interrupt. + */ writel(DIS_INT, adpt->base + EMAC_INT_STATUS); writel(0, adpt->base + EMAC_INT_MASK); synchronize_irq(adpt->irq.irq); free_irq(adpt->irq.irq, &adpt->irq); + phy_disconnect(adpt->phydev); + emac_mac_reset(adpt); emac_tx_q_descs_free(adpt); -- cgit v0.10.2 From 50756ebecf69276b8a908409fa32c123bb12420b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 22:26:11 +0300 Subject: stmmac: fix an error code in stmmac_ptp_register() PTR_ERR(NULL) is success. We have to preserve the error code earlier. Fixes: 7086605a6ab5 ("stmmac: fix error check when init ptp") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 289d527..5d61fb2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -185,8 +185,10 @@ int stmmac_ptp_register(struct stmmac_priv *priv) priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops, priv->device); if (IS_ERR(priv->ptp_clock)) { + int ret = PTR_ERR(priv->ptp_clock); + priv->ptp_clock = NULL; - return PTR_ERR(priv->ptp_clock); + return ret; } spin_lock_init(&priv->ptp_lock); -- cgit v0.10.2 From fb5c6cfaec126d9a96b9dd471d4711bf4c737a6f Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 15 Oct 2016 00:01:20 +0300 Subject: vmxnet3: avoid assumption about invalid dma_pa in vmxnet3_set_mc() vmxnet3_set_mc() checks new_table_pa returned by dma_map_single() with dma_mapping_error(), but even there it assumes zero is invalid pa (it assumes dma_mapping_error(...,0) returns true if new_table is NULL). The patch adds an explicit variable to track status of new_table_pa. Found by Linux Driver Verification project (linuxtesting.org). v2: use "bool" and "true"/"false" for boolean variables. Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b5554f2..ef83ae3 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2279,6 +2279,7 @@ vmxnet3_set_mc(struct net_device *netdev) &adapter->shared->devRead.rxFilterConf; u8 *new_table = NULL; dma_addr_t new_table_pa = 0; + bool new_table_pa_valid = false; u32 new_mode = VMXNET3_RXM_UCAST; if (netdev->flags & IFF_PROMISC) { @@ -2307,13 +2308,15 @@ vmxnet3_set_mc(struct net_device *netdev) new_table, sz, PCI_DMA_TODEVICE); + if (!dma_mapping_error(&adapter->pdev->dev, + new_table_pa)) { + new_mode |= VMXNET3_RXM_MCAST; + new_table_pa_valid = true; + rxConf->mfTablePA = cpu_to_le64( + new_table_pa); + } } - - if (!dma_mapping_error(&adapter->pdev->dev, - new_table_pa)) { - new_mode |= VMXNET3_RXM_MCAST; - rxConf->mfTablePA = cpu_to_le64(new_table_pa); - } else { + if (!new_table_pa_valid) { netdev_info(netdev, "failed to copy mcast list, setting ALL_MULTI\n"); new_mode |= VMXNET3_RXM_ALL_MULTI; @@ -2338,7 +2341,7 @@ vmxnet3_set_mc(struct net_device *netdev) VMXNET3_CMD_UPDATE_MAC_FILTERS); spin_unlock_irqrestore(&adapter->cmd_lock, flags); - if (new_table_pa) + if (new_table_pa_valid) dma_unmap_single(&adapter->pdev->dev, new_table_pa, rxConf->mfTableLen, PCI_DMA_TODEVICE); kfree(new_table); -- cgit v0.10.2 From 9ca488dd53088d4fcc97258aeeccf21f63b7da1e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 29 Sep 2016 17:22:58 +0200 Subject: batman-adv: Modify neigh_list only with rcu-list functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The batadv_hard_iface::neigh_list is accessed via rcu based primitives. Thus all operations done on it have to fulfill the requirements by RCU. So using non-RCU mechanisms like hlist_add_head is not allowed because it misses the barriers required to protect concurrent readers when accessing the data behind the pointer. Fixes: cef63419f7db ("batman-adv: add list of unique single hop neighbors per hard-interface") Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 5f3bfc4..7c8d160 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -544,7 +544,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, if (bat_priv->algo_ops->neigh.hardif_init) bat_priv->algo_ops->neigh.hardif_init(hardif_neigh); - hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); + hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list); out: spin_unlock_bh(&hard_iface->neigh_list_lock); -- cgit v0.10.2 From 611b975b5c868639b7cb0e89e22d1c8c561bf3a1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 16 Jul 2016 21:30:20 +0200 Subject: batman-adv: Add BATADV_DBG_TP_METER to BATADV_DBG_ALL The BATADV_DBG_ALL has to contain the bit of BATADV_DBG_TP_METER to really support all available debug messages. Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index e0e1a88..d2905a8 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -63,7 +63,7 @@ enum batadv_dbg_level { BATADV_DBG_NC = BIT(5), BATADV_DBG_MCAST = BIT(6), BATADV_DBG_TP_METER = BIT(7), - BATADV_DBG_ALL = 127, + BATADV_DBG_ALL = 255, }; #ifdef CONFIG_BATMAN_ADV_DEBUG -- cgit v0.10.2 From f4a067f9ffca603b45f7e82ddd2ba50e5904cea3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Oct 2016 15:05:33 +0100 Subject: mac80211: move struct aead_req off the stack Some crypto implementations (such as the generic CCM wrapper in crypto/) use scatterlists to map fields of private data in their struct aead_req. This means these data structures cannot live in the vmalloc area, which means that they cannot live on the stack (with CONFIG_VMAP_STACK.) This currently occurs only with the generic software implementation, but the private data and usage is implementation specific, so move the whole data structures off the stack into heap by allocating every time we need to use them. In addition, take care not to put any of our own stack allocations into scatterlists. This involves reserving some extra room when allocating the aead_request structures, and referring to those allocations in the scatter- lists (while copying the data from the stack before the crypto operation) Signed-off-by: Ard Biesheuvel Signed-off-by: Johannes Berg diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 7663c28..a4e0d59 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -18,21 +18,24 @@ #include "key.h" #include "aes_ccm.h" -void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len) +int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic, + size_t mic_len) { struct scatterlist sg[3]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *) aead_req_data; + aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; - memset(aead_req, 0, sizeof(aead_req_data)); + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, CCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -41,6 +44,9 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, aead_request_set_ad(aead_req, sg[0].length); crypto_aead_encrypt(aead_req); + kzfree(aead_req); + + return 0; } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, @@ -48,18 +54,23 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t mic_len) { struct scatterlist sg[3]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *) aead_req_data; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; + int err; if (data_len == 0) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, CCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -67,7 +78,10 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, aead_request_set_crypt(aead_req, sg, sg, data_len + mic_len, b_0); aead_request_set_ad(aead_req, sg[0].length); - return crypto_aead_decrypt(aead_req); + err = crypto_aead_decrypt(aead_req); + kzfree(aead_req); + + return err; } struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 6a73d1e..fcd3254 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -12,12 +12,14 @@ #include +#define CCM_AAD_LEN 32 + struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len); -void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len); +int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic, + size_t mic_len); int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic, size_t mic_len); diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c index 3afe361f..8a4397c 100644 --- a/net/mac80211/aes_gcm.c +++ b/net/mac80211/aes_gcm.c @@ -15,20 +15,23 @@ #include "key.h" #include "aes_gcm.h" -void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) +int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[3]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; + aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; - memset(aead_req, 0, sizeof(aead_req_data)); + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, GCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); @@ -37,24 +40,31 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, aead_request_set_ad(aead_req, sg[0].length); crypto_aead_encrypt(aead_req); + kzfree(aead_req); + return 0; } int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[3]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; + int err; if (data_len == 0) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, GCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); @@ -63,7 +73,10 @@ int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, data_len + IEEE80211_GCMP_MIC_LEN, j_0); aead_request_set_ad(aead_req, sg[0].length); - return crypto_aead_decrypt(aead_req); + err = crypto_aead_decrypt(aead_req); + kzfree(aead_req); + + return err; } struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h index 1347fda..55aed53 100644 --- a/net/mac80211/aes_gcm.h +++ b/net/mac80211/aes_gcm.h @@ -11,8 +11,10 @@ #include -void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); +#define GCM_AAD_LEN 32 + +int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic); int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, u8 *data, size_t data_len, u8 *mic); struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c index 3ddd927..bd72a86 100644 --- a/net/mac80211/aes_gmac.c +++ b/net/mac80211/aes_gmac.c @@ -17,28 +17,27 @@ #include "key.h" #include "aes_gmac.h" -#define GMAC_MIC_LEN 16 -#define GMAC_NONCE_LEN 12 -#define AAD_LEN 20 - int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, const u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[4]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; - u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE]; + u8 *zero, *__aad, iv[AES_BLOCK_SIZE]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); if (data_len < GMAC_MIC_LEN) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + GMAC_MIC_LEN + GMAC_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + zero = (u8 *)aead_req + reqsize; + __aad = zero + GMAC_MIC_LEN; + memcpy(__aad, aad, GMAC_AAD_LEN); - memset(zero, 0, GMAC_MIC_LEN); sg_init_table(sg, 4); - sg_set_buf(&sg[0], aad, AAD_LEN); + sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); sg_set_buf(&sg[3], mic, GMAC_MIC_LEN); @@ -49,9 +48,10 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, aead_request_set_tfm(aead_req, tfm); aead_request_set_crypt(aead_req, sg, sg, 0, iv); - aead_request_set_ad(aead_req, AAD_LEN + data_len); + aead_request_set_ad(aead_req, GMAC_AAD_LEN + data_len); crypto_aead_encrypt(aead_req); + kzfree(aead_req); return 0; } diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h index d328204..32e6442 100644 --- a/net/mac80211/aes_gmac.h +++ b/net/mac80211/aes_gmac.h @@ -11,6 +11,10 @@ #include +#define GMAC_AAD_LEN 20 +#define GMAC_MIC_LEN 16 +#define GMAC_NONCE_LEN 12 + struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], size_t key_len); int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b48c1e1..42ce9bd 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -405,7 +405,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, u8 *pos; u8 pn[6]; u64 pn64; - u8 aad[2 * AES_BLOCK_SIZE]; + u8 aad[CCM_AAD_LEN]; u8 b_0[AES_BLOCK_SIZE]; if (info->control.hw_key && @@ -461,10 +461,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad); - ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, - skb_put(skb, mic_len), mic_len); - - return 0; + return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, + skb_put(skb, mic_len), mic_len); } @@ -639,7 +637,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) u8 *pos; u8 pn[6]; u64 pn64; - u8 aad[2 * AES_BLOCK_SIZE]; + u8 aad[GCM_AAD_LEN]; u8 j_0[AES_BLOCK_SIZE]; if (info->control.hw_key && @@ -696,10 +694,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += IEEE80211_GCMP_HDR_LEN; gcmp_special_blocks(skb, pn, j_0, aad); - ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, - skb_put(skb, IEEE80211_GCMP_MIC_LEN)); - - return 0; + return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, + skb_put(skb, IEEE80211_GCMP_MIC_LEN)); } ieee80211_tx_result @@ -1123,9 +1119,9 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) struct ieee80211_key *key = tx->key; struct ieee80211_mmie_16 *mmie; struct ieee80211_hdr *hdr; - u8 aad[20]; + u8 aad[GMAC_AAD_LEN]; u64 pn64; - u8 nonce[12]; + u8 nonce[GMAC_NONCE_LEN]; if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) return TX_DROP; @@ -1171,7 +1167,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie_16 *mmie; - u8 aad[20], mic[16], ipn[6], nonce[12]; + u8 aad[GMAC_AAD_LEN], mic[GMAC_MIC_LEN], ipn[6], nonce[GMAC_NONCE_LEN]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!ieee80211_is_mgmt(hdr->frame_control)) -- cgit v0.10.2 From a04a480d4392ea6efd117be2de564117b2a009c0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 16 Oct 2016 20:02:52 -0700 Subject: net: Require exact match for TCP socket lookups if dif is l3mdev Currently, socket lookups for l3mdev (vrf) use cases can match a socket that is bound to a port but not a device (ie., a global socket). If the sysctl tcp_l3mdev_accept is not set this leads to ack packets going out based on the main table even though the packet came in from an L3 domain. The end result is that the connection does not establish creating confusion for users since the service is running and a socket shows in ss output. Fix by requiring an exact dif to sk_bound_dev_if match if the skb came through an interface enslaved to an l3mdev device and the tcp_l3mdev_accept is not set. skb's through an l3mdev interface are marked by setting a flag in inet{6}_skb_parm. The IPv6 variant is already set; this patch adds the flag for IPv4. Using an skb flag avoids a device lookup on the dif. The flag is set in the VRF driver using the IP{6}CB macros. For IPv4, the inet_skb_parm struct is moved in the cb per commit 971f10eca186, so the match function in the TCP stack needs to use TCP_SKB_CB. For IPv6, the move is done after the socket lookup, so IP6CB is used. The flags field in inet_skb_parm struct needs to be increased to add another flag. There is currently a 1-byte hole following the flags, so it can be expanded to u16 without increasing the size of the struct. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 85c271c..820de6a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -956,6 +956,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (skb->pkt_type == PACKET_LOOPBACK) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; + IP6CB(skb)->flags |= IP6SKB_L3SLAVE; skb->pkt_type = PACKET_HOST; goto out; } @@ -996,6 +997,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; + IPCB(skb)->flags |= IPSKB_L3SLAVE; /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7e9a789..ca1ad9e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -123,12 +123,12 @@ struct inet6_skb_parm { }; #if defined(CONFIG_NET_L3_MASTER_DEV) -static inline bool skb_l3mdev_slave(__u16 flags) +static inline bool ipv6_l3mdev_skb(__u16 flags) { return flags & IP6SKB_L3SLAVE; } #else -static inline bool skb_l3mdev_slave(__u16 flags) +static inline bool ipv6_l3mdev_skb(__u16 flags) { return false; } @@ -139,11 +139,22 @@ static inline bool skb_l3mdev_slave(__u16 flags) static inline int inet6_iif(const struct sk_buff *skb) { - bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags); + bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags); return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } +/* can not be used in TCP layer after tcp_v6_fill_cb */ +static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if defined(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_tcp_l3mdev_accept && + ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return true; +#endif + return false; +} + struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; }; diff --git a/include/net/ip.h b/include/net/ip.h index bc43c0f..c9d0798 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,7 +38,7 @@ struct sock; struct inet_skb_parm { int iif; struct ip_options opt; /* Compiled IP options */ - unsigned char flags; + u16 flags; #define IPSKB_FORWARDED BIT(0) #define IPSKB_XFRM_TUNNEL_SIZE BIT(1) @@ -48,10 +48,16 @@ struct inet_skb_parm { #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_FRAG_SEGS BIT(7) +#define IPSKB_L3SLAVE BIT(8) u16 frag_max_size; }; +static inline bool ipv4_l3mdev_skb(u16 flags) +{ + return !!(flags & IPSKB_L3SLAVE); +} + static inline unsigned int ip_hdrlen(const struct sk_buff *skb) { return ip_hdr(skb)->ihl * 4; diff --git a/include/net/tcp.h b/include/net/tcp.h index f83b7f2..5b82d4d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -794,12 +794,23 @@ struct tcp_skb_cb { */ static inline int tcp_v6_iif(const struct sk_buff *skb) { - bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags); + bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } #endif +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_tcp_l3mdev_accept && + ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + return true; +#endif + return false; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 77c20a4..ca97835 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -25,6 +25,7 @@ #include #include #include +#include #include static u32 inet_ehashfn(const struct net *net, const __be32 laddr, @@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; struct inet_sock *inet = inet_sk(sk); @@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score += 4; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score += 4; @@ -215,11 +216,12 @@ struct sock *__inet_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 00cf28a..2fd0374 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; @@ -109,7 +109,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score++; @@ -131,11 +131,12 @@ struct sock *inet6_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { -- cgit v0.10.2 From 9a0b1e8ba4061778897b544afc898de2163382f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 15 Oct 2016 17:50:49 +0200 Subject: net: pktgen: remove rcu locking in pktgen_change_name() After Jesper commit back in linux-3.18, we trigger a lockdep splat in proc_create_data() while allocating memory from pktgen_change_name(). This patch converts t->if_lock to a mutex, since it is now only used from control path, and adds proper locking to pktgen_change_name() 1) pktgen_thread_lock to protect the outer loop (iterating threads) 2) t->if_lock to protect the inner loop (iterating devices) Note that before Jesper patch, pktgen_change_name() was lacking proper protection, but lockdep was not able to detect the problem. Fixes: 8788370a1d4b ("pktgen: RCU-ify "if_list" to remove lock in next_to_run()") Reported-by: John Sperbeck Signed-off-by: Eric Dumazet Cc: Jesper Dangaard Brouer Signed-off-by: David S. Miller diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 5219a9e..306b8f0 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -216,8 +216,8 @@ #define M_QUEUE_XMIT 2 /* Inject packet into qdisc */ /* If lock -- protects updating of if_list */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); +#define if_lock(t) mutex_lock(&(t->if_lock)); +#define if_unlock(t) mutex_unlock(&(t->if_lock)); /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 @@ -423,7 +423,7 @@ struct pktgen_net { }; struct pktgen_thread { - spinlock_t if_lock; /* for list of devices */ + struct mutex if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ struct list_head th_list; struct task_struct *tsk; @@ -2010,11 +2010,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d { struct pktgen_thread *t; + mutex_lock(&pktgen_thread_lock); + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - rcu_read_lock(); - list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { + if_lock(t); + list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -2029,8 +2031,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } - rcu_read_unlock(); + if_unlock(t); } + mutex_unlock(&pktgen_thread_lock); } static int pktgen_device_event(struct notifier_block *unused, @@ -3762,7 +3765,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) return -ENOMEM; } - spin_lock_init(&t->if_lock); + mutex_init(&t->if_lock); t->cpu = cpu; INIT_LIST_HEAD(&t->if_list); -- cgit v0.10.2 From 67b11e2ea704f99953f18585609df17b6e189e53 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 16 Oct 2016 23:54:03 +0100 Subject: cxgb4: fix memory leak of qe on error exit path A memory leak of qe occurs when t4_sched_queue_unbind fails, so fix this by free'ing qe on the error exit path. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c index 539de76..cbd68a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -210,8 +210,10 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) /* Unbind queue from any existing class */ err = t4_sched_queue_unbind(pi, p); - if (err) + if (err) { + t4_free_mem(qe); goto out; + } /* Bind queue to specified class */ memset(qe, 0, sizeof(*qe)); -- cgit v0.10.2 From 1b203c138c5a0d78aa34d1c4346ff6b32ac74869 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Oct 2016 15:40:14 +0200 Subject: netfilter: xt_hashlimit: Add missing ULL suffixes for 64-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): net/netfilter/xt_hashlimit.c: In function ‘user2credits’: net/netfilter/xt_hashlimit.c:476: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c:478: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c:480: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c: In function ‘rateinfo_recalc’: net/netfilter/xt_hashlimit.c:513: warning: integer constant is too large for ‘long’ type Fixes: 11d5f15723c9f39d ("netfilter: xt_hashlimit: Create revision 2 to support higher pps rates") Signed-off-by: Geert Uytterhoeven Acked-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2fab0c6..b89b688 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -431,7 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) -#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFFULL / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -473,7 +473,7 @@ static u64 user2credits(u64 user, int revision) return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1, XT_HASHLIMIT_SCALE); } else { - if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + if (user > 0xFFFFFFFFFFFFFFFFULL / (HZ*CREDITS_PER_JIFFY)) return div64_u64(user, XT_HASHLIMIT_SCALE_v2) * HZ * CREDITS_PER_JIFFY; -- cgit v0.10.2 From a8b1e36d0d1d6f51490e7adce35367ed6adb10e7 Mon Sep 17 00:00:00 2001 From: "Anders K. Pedersen" Date: Sun, 9 Oct 2016 13:49:02 +0000 Subject: netfilter: nft_dynset: fix element timeout for HZ != 1000 With HZ=100 element timeout in dynamic sets (i.e. flow tables) is 10 times higher than configured. Add proper conversion to/from jiffies, when interacting with userspace. I tested this on Linux 4.8.1, and it applies cleanly to current nf and nf-next trees. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Anders K. Pedersen Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index e3b83c3..517f087 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -158,7 +158,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + tb[NFTA_DYNSET_TIMEOUT]))); } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -246,7 +247,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout), + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, + cpu_to_be64(jiffies_to_msecs(priv->timeout)), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) -- cgit v0.10.2 From 4f76de5f237615643d2243582cee331815312ad0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Oct 2016 12:18:23 +0200 Subject: netfilter: conntrack: remove obsolete sysctl (nf_conntrack_events_retry_timeout) This entry has been removed in commit 9500507c6138. Fixes: 9500507c6138 ("netfilter: conntrack: remove timer from ecache extension") Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 4fb51d3..399e4e8 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -33,24 +33,6 @@ nf_conntrack_events - BOOLEAN If this option is enabled, the connection tracking code will provide userspace with connection tracking events via ctnetlink. -nf_conntrack_events_retry_timeout - INTEGER (seconds) - default 15 - - This option is only relevant when "reliable connection tracking - events" are used. Normally, ctnetlink is "lossy", that is, - events are normally dropped when userspace listeners can't keep up. - - Userspace can request "reliable event mode". When this mode is - active, the conntrack will only be destroyed after the event was - delivered. If event delivery fails, the kernel periodically - re-tries to send the event to userspace. - - This is the maximum interval the kernel should use when re-trying - to deliver the destroy event. - - A higher number means there will be fewer delivery retries and it - will take longer for a backlog to be processed. - nf_conntrack_expect_max - INTEGER Maximum size of expectation table. Default value is nf_conntrack_buckets / 256. Minimum is 1. -- cgit v0.10.2 From 6d19375b58763fefc2f215fb45117d3353ced888 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 11 Oct 2016 21:03:45 +0800 Subject: netfilter: xt_NFLOG: fix unexpected truncated packet Justin and Chris spotted that iptables NFLOG target was broken when they upgraded the kernel to 4.8: "ulogd-2.0.5- IPs are no longer logged" or "results in segfaults in ulogd-2.0.5". Because "struct nf_loginfo li;" is a local variable, and flags will be filled with garbage value, not inited to zero. So if it contains 0x1, packets will not be logged to the userspace anymore. Fixes: 7643507fe8b5 ("netfilter: xt_NFLOG: nflog-range does not truncate packets") Reported-by: Justin Piszcz Reported-by: Chris Caputo Tested-by: Chris Caputo Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 018eed7..8668a5c 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -32,6 +32,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; + li.u.ulog.flags = 0; if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; -- cgit v0.10.2 From f434ed0a00b232efc9843c869d05048c7cd79bb7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 12 Oct 2016 21:09:22 +0800 Subject: netfilter: xt_ipcomp: add "ip[6]t_ipcomp" module alias name Otherwise, user cannot add related rules if xt_ipcomp.ko is not loaded: # iptables -A OUTPUT -p 108 -m ipcomp --ipcompspi 1 iptables: No chain/target/match by that name. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 89d5310..000e703 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -26,6 +26,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fan Du "); MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); +MODULE_ALIAS("ipt_ipcomp"); +MODULE_ALIAS("ip6t_ipcomp"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool -- cgit v0.10.2 From 5751e175c60be1b4113d1e9bfa011b58ec01f104 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 12 Oct 2016 21:10:45 +0800 Subject: netfilter: nft_hash: add missing NFTA_HASH_OFFSET's nla_policy Missing the nla_policy description will also miss the validation check in kernel. Fixes: 70ca767ea1b2 ("netfilter: nft_hash: Add hash offset value") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 09473b4..baf694d 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -44,6 +44,7 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_LEN] = { .type = NLA_U32 }, [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, + [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, }; static int nft_hash_init(const struct nft_ctx *ctx, -- cgit v0.10.2 From 09525a09ad3099efd9ba49b0b90bddc350d6b53a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 12:14:29 +0300 Subject: netfilter: nf_tables: underflow in nft_parse_u32_check() We don't want to allow negatives here. Fixes: 36b701fae12a ('netfilter: nf_tables: validate maximum value of u32 netlink attributes') Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b70d3ea..24db222 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4423,7 +4423,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, */ unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { - int val; + u32 val; val = ntohl(nla_get_be32(attr)); if (val > max) -- cgit v0.10.2 From 21a9e0f1568eaa0aad970c06e4cc8d77de8d9fa1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:09:12 +0300 Subject: netfilter: nft_exthdr: fix error handling in nft_exthdr_init() "err" needs to be signed for the error handling to work. Fixes: 36b701fae12a ('netfilter: nf_tables: validate maximum value of u32 netlink attributes') Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index a84cf3d..47beb3a 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len, err; + u32 offset, len; + int err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || -- cgit v0.10.2 From ccca6607c545534e5b74ef04e0f2f6ba11344be4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Oct 2016 08:42:17 +0200 Subject: netfilter: nft_range: validate operation netlink attribute Use nft_parse_u32_check() to make sure we don't get a value over the unsigned 8-bit integer. Moreover, make sure this value doesn't go over the two supported range comparison modes. Fixes: 9286c2eb1fda ("netfilter: nft_range: validate operation netlink attribute") Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index c6d5358..9bc4586 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -59,6 +59,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr struct nft_range_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc_from, desc_to; int err; + u32 op; err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), &desc_from, tb[NFTA_RANGE_FROM_DATA]); @@ -80,7 +81,20 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr if (err < 0) goto err2; - priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op); + if (err < 0) + goto err2; + + switch (op) { + case NFT_RANGE_EQ: + case NFT_RANGE_NEQ: + break; + default: + err = -EINVAL; + goto err2; + } + + priv->op = op; priv->len = desc_from.len; return 0; err2: -- cgit v0.10.2 From fc971a2f2383950ddedb1e4896c5ead5d2357029 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:40 -0300 Subject: net: nps_enet: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/ezchip/nps_enet.ko | grep alias $ After this patch: $ modinfo drivers/net/ethernet/ezchip/nps_enet.ko | grep alias alias: of:N*T*Cezchip,nps-mgt-enetC* alias: of:N*T*Cezchip,nps-mgt-enet Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index f928e6f..223f35c 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -669,6 +669,7 @@ static const struct of_device_id nps_enet_dt_ids[] = { { .compatible = "ezchip,nps-mgt-enet" }, { /* Sentinel */ } }; +MODULE_DEVICE_TABLE(of, nps_enet_dt_ids); static struct platform_driver nps_enet_driver = { .probe = nps_enet_probe, -- cgit v0.10.2 From 2fa3e317e6c89fc89e96a48308f6791f74fcea3c Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:41 -0300 Subject: net: ethernet: nb8800: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ $ modinfo drivers/net/ethernet/aurora/nb8800.ko | grep alias $ After this patch: $ modinfo drivers/net/ethernet/aurora/nb8800.ko | grep alias alias: of:N*T*Csigma,smp8734-ethernetC* alias: of:N*T*Csigma,smp8734-ethernet alias: of:N*T*Csigma,smp8642-ethernetC* alias: of:N*T*Csigma,smp8642-ethernet alias: of:N*T*Caurora,nb8800C* alias: of:N*T*Caurora,nb8800 Signed-off-by: Javier Martinez Canillas Acked-by: Mans Rullgard Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c index b047fd6..00c38bf 100644 --- a/drivers/net/ethernet/aurora/nb8800.c +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -1358,6 +1358,7 @@ static const struct of_device_id nb8800_dt_ids[] = { }, { } }; +MODULE_DEVICE_TABLE(of, nb8800_dt_ids); static int nb8800_probe(struct platform_device *pdev) { -- cgit v0.10.2 From a7deb924d33b5688b11e34e84c88f2591f933e3a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:42 -0300 Subject: net: hns: Fix hns_dsaf module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/hisilicon/hns/hns_dsaf.ko | grep alias alias: acpi*:HISI00B2:* alias: acpi*:HISI00B1:* After this patch: $ modinfo drivers/net/ethernet/hisilicon/hns/hns_dsaf.ko | grep alias alias: acpi*:HISI00B2:* alias: acpi*:HISI00B1:* alias: of:N*T*Chisilicon,hns-dsaf-v2C* alias: of:N*T*Chisilicon,hns-dsaf-v2 alias: of:N*T*Chisilicon,hns-dsaf-v1C* alias: of:N*T*Chisilicon,hns-dsaf-v1 Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 8e5b3f5..66b99d2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2761,6 +2761,7 @@ static const struct of_device_id g_dsaf_match[] = { {.compatible = "hisilicon,hns-dsaf-v2"}, {} }; +MODULE_DEVICE_TABLE(of, g_dsaf_match); static struct platform_driver g_dsaf_driver = { .probe = hns_dsaf_probe, -- cgit v0.10.2 From 7097268509a7d6756d14ab422af7446f9bf53221 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:43 -0300 Subject: net: qcom/emac: Fix module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/qualcomm/emac/qcom-emac.ko | grep alias alias: platform:qcom-emac After this patch: $ modinfo drivers/net/ethernet/qualcomm/emac/qcom-emac.ko | grep alias alias: platform:qcom-emac alias: of:N*T*Cqcom,fsm9900-emacC* alias: of:N*T*Cqcom,fsm9900-emac Signed-off-by: Javier Martinez Canillas Acked-by: Timur Tabi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 9bf3b2b..4fede4b 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -575,6 +575,7 @@ static const struct of_device_id emac_dt_match[] = { }, {} }; +MODULE_DEVICE_TABLE(of, emac_dt_match); #if IS_ENABLED(CONFIG_ACPI) static const struct acpi_device_id emac_acpi_match[] = { -- cgit v0.10.2 From af40097e3eafc97f6f856c085360a0e696e1f319 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:44 -0300 Subject: net: hisilicon: Fix hns_mdio module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/hisilicon//hns_mdio.ko | grep alias alias: platform:Hi-HNS_MDIO alias: acpi*:HISI0141:* After this patch: $ modinfo drivers/net/ethernet/hisilicon//hns_mdio.ko | grep alias alias: platform:Hi-HNS_MDIO alias: of:N*T*Chisilicon,hns-mdioC* alias: of:N*T*Chisilicon,hns-mdio alias: of:N*T*Chisilicon,mdioC* alias: of:N*T*Chisilicon,mdio alias: acpi*:HISI0141:* Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 33f4c48..501eb20 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -563,6 +563,7 @@ static const struct of_device_id hns_mdio_match[] = { {.compatible = "hisilicon,hns-mdio"}, {} }; +MODULE_DEVICE_TABLE(of, hns_mdio_match); static const struct acpi_device_id hns_mdio_acpi_match[] = { { "HISI0141", 0 }, -- cgit v0.10.2 From 03eaae52537f4bbe2e565535f2aa38d80f92e994 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:45 -0300 Subject: net: dsa: b53: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/dsa/b53/b53_mmap.ko | grep alias $ After this patch: $ modinfo drivers/net/dsa/b53/b53_mmap.ko | grep alias alias: of:N*T*Cbrcm,bcm63xx-switchC* alias: of:N*T*Cbrcm,bcm63xx-switch alias: of:N*T*Cbrcm,bcm6368-switchC* alias: of:N*T*Cbrcm,bcm6368-switch alias: of:N*T*Cbrcm,bcm6328-switchC* alias: of:N*T*Cbrcm,bcm6328-switch alias: of:N*T*Cbrcm,bcm3384-switchC* alias: of:N*T*Cbrcm,bcm3384-switch Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 76fb855..ef63d24 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -256,6 +256,7 @@ static const struct of_device_id b53_mmap_of_table[] = { { .compatible = "brcm,bcm63xx-switch" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, b53_mmap_of_table); static struct platform_driver b53_mmap_driver = { .probe = b53_mmap_probe, -- cgit v0.10.2 From 0822b43e4f08af3619a5dbf59f5036872c3ea323 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:46 -0300 Subject: net: dsa: bcm_sf2: Fix module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/dsa/bcm_sf2.ko | grep alias alias: platform:brcm-sf2 After this patch: $ modinfo drivers/net/dsa/bcm_sf2.ko | grep alias alias: platform:brcm-sf2 alias: of:N*T*Cbrcm,bcm7445-switch-v4.0C* alias: of:N*T*Cbrcm,bcm7445-switch-v4.0 Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index e218887..0427009 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1158,6 +1158,7 @@ static const struct of_device_id bcm_sf2_of_match[] = { { .compatible = "brcm,bcm7445-switch-v4.0" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, bcm_sf2_of_match); static struct platform_driver bcm_sf2_driver = { .probe = bcm_sf2_sw_probe, -- cgit v0.10.2 From 6bc80629eefc3731f5881092bed610d994e05763 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 18 Oct 2016 08:16:03 +0200 Subject: bnx2: fix locking when netconsole is used Functions bnx2_reg_rd_ind(), bnx2_reg_wr_ind() and bnx2_ctx_wr() can be called with IRQs disabled when netconsole is enabled. So they should use spin_{,un}lock_irq{save,restore} instead of _bh variants. Example call flow: bnx2_poll() ->bnx2_poll_link() ->bnx2_phy_int() ->bnx2_set_remote_link() ->bnx2_shmem_rd() ->bnx2_reg_rd_ind() -> spin_lock_bh(&bp->indirect_lock); spin_unlock_bh(&bp->indirect_lock); ... -> __local_bh_enable_ip static inline void __local_bh_enable_ip(unsigned long ip) WARN_ON_ONCE(in_irq() || irqs_disabled()); <<<<<< WARN Cc: Sony Chacko Cc: Dept-HSGLinuxNICDev@qlogic.com Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 27f11a5..b3791b3 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -271,22 +271,25 @@ static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr) static u32 bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset) { + unsigned long flags; u32 val; - spin_lock_bh(&bp->indirect_lock); + spin_lock_irqsave(&bp->indirect_lock, flags); BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW_ADDRESS, offset); val = BNX2_RD(bp, BNX2_PCICFG_REG_WINDOW); - spin_unlock_bh(&bp->indirect_lock); + spin_unlock_irqrestore(&bp->indirect_lock, flags); return val; } static void bnx2_reg_wr_ind(struct bnx2 *bp, u32 offset, u32 val) { - spin_lock_bh(&bp->indirect_lock); + unsigned long flags; + + spin_lock_irqsave(&bp->indirect_lock, flags); BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW_ADDRESS, offset); BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW, val); - spin_unlock_bh(&bp->indirect_lock); + spin_unlock_irqrestore(&bp->indirect_lock, flags); } static void @@ -304,8 +307,10 @@ bnx2_shmem_rd(struct bnx2 *bp, u32 offset) static void bnx2_ctx_wr(struct bnx2 *bp, u32 cid_addr, u32 offset, u32 val) { + unsigned long flags; + offset += cid_addr; - spin_lock_bh(&bp->indirect_lock); + spin_lock_irqsave(&bp->indirect_lock, flags); if (BNX2_CHIP(bp) == BNX2_CHIP_5709) { int i; @@ -322,7 +327,7 @@ bnx2_ctx_wr(struct bnx2 *bp, u32 cid_addr, u32 offset, u32 val) BNX2_WR(bp, BNX2_CTX_DATA_ADR, offset); BNX2_WR(bp, BNX2_CTX_DATA, val); } - spin_unlock_bh(&bp->indirect_lock); + spin_unlock_irqrestore(&bp->indirect_lock, flags); } #ifdef BCM_CNIC -- cgit v0.10.2 From 8d324fd9e1bee59274d67a0a08eac5f7b1573db7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 18 Oct 2016 09:07:29 +0200 Subject: net/hsr: Remove unused but set variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused but set variable master_dev in check_local_dest to fix the following GCC warning when building with 'W=1': net/hsr/hsr_forward.c: In function ‘check_local_dest’: net/hsr/hsr_forward.c:303:21: warning: variable ‘master_dev’ set but not used [-Wunused-but-set-variable] Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 5ee1d43..4ebe2aa 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -300,10 +300,6 @@ static void hsr_forward_do(struct hsr_frame_info *frame) static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, struct hsr_frame_info *frame) { - struct net_device *master_dev; - - master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev; - if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) { frame->is_local_exclusive = true; skb->pkt_type = PACKET_HOST; -- cgit v0.10.2 From 902943c0d759a090490b2bf6c91a49395b353653 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 18 Oct 2016 09:20:33 +0200 Subject: dwc_eth_qos: do not clear pause flags from phy_device->supported phy_device->supported is originally set by the PHY driver. The ethernet driver should filter phy_device->supported to only contain flags supported by the IP. The IP supports setting rx and tx flow control independently, therefore SUPPORTED_Pause and SUPPORTED_Asym_Pause should not be cleared. If the flags are cleared, pause frames cannot be enabled (even if they are supported by the PHY). Signed-off-by: Niklas Cassel Signed-off-by: Jesper Nilsson Acked-by: Lars Persson Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 0d00531..d775729 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -982,7 +982,8 @@ static int dwceqos_mii_probe(struct net_device *ndev) if (netif_msg_probe(lp)) phy_attached_info(phydev); - phydev->supported &= PHY_GBIT_FEATURES; + phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause; lp->link = 0; lp->speed = 0; -- cgit v0.10.2 From 1826277802d83b47d77e6a3e876aec07777aa271 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 18 Oct 2016 09:20:55 +0200 Subject: dwc_eth_qos: enable flow control by default Allow autoneg to enable flow control by default. The behavior when autoneg is off has not changed. Signed-off-by: Niklas Cassel Signed-off-by: Jesper Nilsson Acked-by: Lars Persson Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index d775729..5eedac4 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -988,6 +988,7 @@ static int dwceqos_mii_probe(struct net_device *ndev) lp->link = 0; lp->speed = 0; lp->duplex = DUPLEX_UNKNOWN; + lp->flowcontrol.autoneg = AUTONEG_ENABLE; return 0; } -- cgit v0.10.2 From 7e1670c15c9b18d614b43a674e086eb3e239c8f7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 18 Oct 2016 09:40:20 +0200 Subject: ipv4: Remove unused but set variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused but set variable dev in ip_do_fragment to fix the following GCC warning when building with 'W=1': net/ipv4/ip_output.c: In function ‘ip_do_fragment’: net/ipv4/ip_output.c:541:21: warning: variable ‘dev’ set but not used [-Wunused-but-set-variable] Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 05d1058..03e7f73 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -538,7 +538,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; int ptr; - struct net_device *dev; struct sk_buff *skb2; unsigned int mtu, hlen, left, len, ll_rs; int offset; @@ -546,8 +545,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rtable *rt = skb_rtable(skb); int err = 0; - dev = rt->dst.dev; - /* for offloaded checksums cleanup checksum before fragmentation */ if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) -- cgit v0.10.2 From a56177e18f2e44499a8bf5bc03dbe896dbec657d Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Tue, 18 Oct 2016 14:21:25 +0530 Subject: cxgb4: Fix number of queue sets corssing the limit Do not let number of offload queue sets to go more than MAX_OFLD_QSETS, which would otherwise crash the driver on machines with cores more than MAX_OFLD_QSETS. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f320497..57eb4e1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4057,7 +4057,7 @@ static void cfg_queues(struct adapter *adap) * capped by the number of available cores. */ if (n10g) { - i = num_online_cpus(); + i = min_t(int, MAX_OFLD_QSETS, num_online_cpus()); s->ofldqsets = roundup(i, adap->params.nports); } else { s->ofldqsets = adap->params.nports; -- cgit v0.10.2 From 7ab488951aa536d52feb3690d204a693edf4f433 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 18 Oct 2016 11:22:54 +0200 Subject: tcp: Remove unused but set variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused but set variable icsk in listening_get_next to fix the following GCC warning when building with 'W=1': net/ipv4/tcp_ipv4.c: In function ‘listening_get_next’: net/ipv4/tcp_ipv4.c:1890:31: warning: variable ‘icsk’ set but not used [-Wunused-but-set-variable] Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bd5e8d1..79d55eb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1887,7 +1887,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; - struct inet_connection_sock *icsk; struct sock *sk = cur; if (!sk) { @@ -1909,7 +1908,6 @@ get_sk: continue; if (sk->sk_family == st->family) return sk; - icsk = inet_csk(sk); } spin_unlock_bh(&ilb->lock); st->offset = 0; -- cgit v0.10.2 From d2e4d593516e877f1f6fb40031eb495f36606e16 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:05:30 +0200 Subject: netfilter: nf_tables: avoid uninitialized variable warning The newly added nft_range_eval() function handles the two possible nft range operations, but as the compiler warning points out, any unexpected value would lead to the 'mismatch' variable being used without being initialized: net/netfilter/nft_range.c: In function 'nft_range_eval': net/netfilter/nft_range.c:45:5: error: 'mismatch' may be used uninitialized in this function [-Werror=maybe-uninitialized] This removes the variable in question and instead moves the condition into the switch itself, which is potentially more efficient than adding a bogus 'default' clause as in my first approach, and is nicer than using the 'uninitialized_var' macro. Fixes: 0f3cd9b36977 ("netfilter: nf_tables: add range expression") Link: http://patchwork.ozlabs.org/patch/677114/ Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 9bc4586..fbc8800 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -28,22 +28,20 @@ static void nft_range_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_range_expr *priv = nft_expr_priv(expr); - bool mismatch; int d1, d2; d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); switch (priv->op) { case NFT_RANGE_EQ: - mismatch = (d1 < 0 || d2 > 0); + if (d1 < 0 || d2 > 0) + regs->verdict.code = NFT_BREAK; break; case NFT_RANGE_NEQ: - mismatch = (d1 >= 0 && d2 <= 0); + if (d1 >= 0 && d2 <= 0) + regs->verdict.code = NFT_BREAK; break; } - - if (mismatch) - regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { -- cgit v0.10.2 From 7cb3f9214dfa443c1ccc2be637dcc6344cc203f0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 18 Oct 2016 18:09:48 +0200 Subject: bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index c5fea93..2136e45 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -972,13 +972,12 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query) mod_timer(&query->timer, jiffies); } -void br_multicast_enable_port(struct net_bridge_port *port) +static void __br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; - spin_lock(&br->multicast_lock); if (br->multicast_disabled || !netif_running(br->dev)) - goto out; + return; br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) @@ -987,8 +986,14 @@ void br_multicast_enable_port(struct net_bridge_port *port) if (port->multicast_router == MDB_RTR_TYPE_PERM && hlist_unhashed(&port->rlist)) br_multicast_add_router(br, port); +} -out: +void br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + __br_multicast_enable_port(port); spin_unlock(&br->multicast_lock); } @@ -1994,8 +1999,9 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { - int err = 0; struct net_bridge_mdb_htable *mdb; + struct net_bridge_port *port; + int err = 0; spin_lock_bh(&br->multicast_lock); if (br->multicast_disabled == !val) @@ -2023,10 +2029,9 @@ rollback: goto rollback; } - br_multicast_start_querier(br, &br->ip4_own_query); -#if IS_ENABLED(CONFIG_IPV6) - br_multicast_start_querier(br, &br->ip6_own_query); -#endif + br_multicast_open(br); + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port(port); unlock: spin_unlock_bh(&br->multicast_lock); -- cgit v0.10.2 From 9fa2f2cca21b14d17b1c1a37babb639a48a5a29f Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 17 Oct 2016 15:56:29 -0500 Subject: ibmvnic: Driver Version 1.0.1 Increment driver version to reflect features that have been added since release. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index bfc84c7..878e2c0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -27,7 +27,7 @@ /**************************************************************************/ #define IBMVNIC_NAME "ibmvnic" -#define IBMVNIC_DRIVER_VERSION "1.0" +#define IBMVNIC_DRIVER_VERSION "1.0.1" #define IBMVNIC_INVALID_MAP -1 #define IBMVNIC_STATS_TIMEOUT 1 /* basic structures plus 100 2k buffers */ -- cgit v0.10.2 From 12608c260d2fe36746508cb4fa20b6e9a5f9c241 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 17 Oct 2016 15:28:09 -0500 Subject: ibmvnic: Fix GFP_KERNEL allocation in interrupt context Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bfe17d9..928bf8a 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1190,7 +1190,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter if (!scrq) return NULL; - scrq->msgs = (union sub_crq *)__get_free_pages(GFP_KERNEL, 2); + scrq->msgs = (union sub_crq *)__get_free_pages(GFP_ATOMIC, 2); memset(scrq->msgs, 0, 4 * PAGE_SIZE); if (!scrq->msgs) { dev_warn(dev, "Couldn't allocate crq queue messages page\n"); -- cgit v0.10.2 From 87737f8810db445db171ca81ca4cc43bd5b067ce Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 17 Oct 2016 15:28:10 -0500 Subject: ibmvnic: Update MTU after device initialization It is possible for the MTU to be changed during the initialization process with the VNIC Server. Ensure that the net device is updated to reflect the new MTU. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 928bf8a..213162d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3654,6 +3654,7 @@ static void handle_crq_init_rsp(struct work_struct *work) goto task_failed; netdev->real_num_tx_queues = adapter->req_tx_queues; + netdev->mtu = adapter->req_mtu; if (adapter->failover) { adapter->failover = false; @@ -3792,6 +3793,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } netdev->real_num_tx_queues = adapter->req_tx_queues; + netdev->mtu = adapter->req_mtu; rc = register_netdev(netdev); if (rc) { -- cgit v0.10.2 From 41ee9c557ef5de992843b6dac35a199e651525cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2016 14:22:48 -0700 Subject: soreuseport: do not export reuseport_add_sock() reuseport_add_sock() is not used from a module, no need to export it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index e92b759..9a1a352 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -129,7 +129,6 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) return 0; } -EXPORT_SYMBOL(reuseport_add_sock); static void reuseport_free_rcu(struct rcu_head *head) { -- cgit v0.10.2 From 4b75ca5a7a9e01dda3ea70bc17b1826fd679af61 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:16:08 +0200 Subject: net: bcm63xx: avoid referencing uninitialized variable gcc found a reference to an uninitialized variable in the error handling of bcm_enet_open, introduced by a recent cleanup: drivers/net/ethernet/broadcom/bcm63xx_enet.c: In function 'bcm_enet_open' drivers/net/ethernet/broadcom/bcm63xx_enet.c:1129:2: warning: 'phydev' may be used uninitialized in this function [-Wmaybe-uninitialized] This makes the use of that variable conditional, so we only reference it here after it has been used before. Unlike my normal patches, I have not build-tested this one, as I don't currently have mips test in my randconfig setup. Fixes: 625eb8667d6f ("net: ethernet: broadcom: bcm63xx: use phydev from struct net_device") Cc: Philippe Reynes Reported-by: kbuild test robot Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index ae364c7..5370909 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1126,7 +1126,8 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - phy_disconnect(phydev); + if (priv->has_phy) + phy_disconnect(phydev); return ret; } -- cgit v0.10.2 From 52ccd6318481a467d8ad54ea40f60c61e957d58f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:16:09 +0200 Subject: net/hyperv: avoid uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hdr_offset variable is only if we deal with a TCP or UDP packet, but as the check surrounding its usage tests for skb_is_gso() instead, the compiler has no idea if the variable is initialized or not at that point: drivers/net/hyperv/netvsc_drv.c: In function ‘netvsc_start_xmit’: drivers/net/hyperv/netvsc_drv.c:494:42: error: ‘hdr_offset’ may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds an additional check for the transport type, which tells the compiler that this path cannot happen. Since the get_net_transport_info() function should always be inlined here, I don't expect this to result in additional runtime checks. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f0919bd..5d6e75a 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -447,7 +447,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) * Setup the sendside checksum offload only if this is not a * GSO packet. */ - if (skb_is_gso(skb)) { + if ((net_trans_info & (INFO_TCP | INFO_UDP)) && skb_is_gso(skb)) { struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; -- cgit v0.10.2 From ecf244f753e09486707843f2b7b1874f33027038 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:16:15 +0200 Subject: rocker: fix maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some rare configurations, we get a warning about the 'index' variable being used without an initialization: drivers/net/ethernet/rocker/rocker_ofdpa.c: In function ‘ofdpa_port_fib_ipv4.isra.16.constprop’: drivers/net/ethernet/rocker/rocker_ofdpa.c:2425:92: warning: ‘index’ may be used uninitialized in this function [-Wmaybe-uninitialized] This is a false positive, the logic is just a bit too complex for gcc to follow here. Moving the intialization of 'index' a little further down makes it clear to gcc that the function always returns an error if it is not initialized. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 431a608..4ca4613 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1493,8 +1493,6 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port, spin_lock_irqsave(&ofdpa->neigh_tbl_lock, lock_flags); found = ofdpa_neigh_tbl_find(ofdpa, ip_addr); - if (found) - *index = found->index; updating = found && adding; removing = found && !adding; @@ -1508,9 +1506,11 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port, resolved = false; } else if (removing) { ofdpa_neigh_del(trans, found); + *index = found->index; } else if (updating) { ofdpa_neigh_update(found, trans, NULL, false); resolved = !is_zero_ether_addr(found->eth_dst); + *index = found->index; } else { err = -ENOENT; } -- cgit v0.10.2 From b4f0fd4baa90ecce798e0d26d1cce8f4457f2028 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:17:51 +0000 Subject: qed: Use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). Signed-off-by: Wei Yongjun Acked-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index ce171a8..63e1a1b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -538,8 +538,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) if (!p_pkt) break; - list_del(&p_pkt->list_entry); - list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + list_move_tail(&p_pkt->list_entry, &p_rx->free_descq); rx_buf_addr = p_pkt->rx_buf_addr; cookie = p_pkt->cookie; @@ -993,9 +992,8 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn, p_posting_packet = list_first_entry(&p_rx->posting_descq, struct qed_ll2_rx_packet, list_entry); - list_del(&p_posting_packet->list_entry); - list_add_tail(&p_posting_packet->list_entry, - &p_rx->active_descq); + list_move_tail(&p_posting_packet->list_entry, + &p_rx->active_descq); b_notify_fw = true; } @@ -1186,8 +1184,7 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn, if (!p_pkt) break; - list_del(&p_pkt->list_entry); - list_add_tail(&p_pkt->list_entry, &p_tx->active_descq); + list_move_tail(&p_pkt->list_entry, &p_tx->active_descq); } SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM); -- cgit v0.10.2 From 3805a938a6c24284863732fd45cec5a04609a224 Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Mon, 17 Oct 2016 16:30:12 -0400 Subject: flow_dissector: Check skb for VLAN only if skb specified. Fixes a panic when calling eth_get_headlen(). Noticed on i40e driver. Fixes: d5709f7ab776 ("flow_dissector: For stripped vlan, get vlan info from skb->vlan_tci") Signed-off-by: Eric Garver Reviewed-by: Jakub Sitnicki Acked-by: Amir Vadai Signed-off-by: David S. Miller diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1a7b80f..44e6ba9 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -247,12 +247,10 @@ ipv6: case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; - if (skb_vlan_tag_present(skb)) + if (skb && skb_vlan_tag_present(skb)) proto = skb->protocol; - if (!skb_vlan_tag_present(skb) || - proto == cpu_to_be16(ETH_P_8021Q) || - proto == cpu_to_be16(ETH_P_8021AD)) { + if (eth_type_vlan(proto)) { struct vlan_hdr _vlan; vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), -- cgit v0.10.2 From e4961b0768852d9eb7383e1a5df178eacb714656 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 19 Oct 2016 16:57:08 +0300 Subject: net: core: Correctly iterate over lower adjacency list Tamir reported the following trace when processing ARP requests received via a vlan device on top of a VLAN-aware bridge: NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [swapper/1:0] [...] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 4.8.0-rc7 #1 Hardware name: Mellanox Technologies Ltd. "MSN2100-CB2F"/"SA001017", BIOS 5.6.5 06/07/2016 task: ffff88017edfea40 task.stack: ffff88017ee10000 RIP: 0010:[] [] netdev_all_lower_get_next_rcu+0x33/0x60 [...] Call Trace: [] mlxsw_sp_port_lower_dev_hold+0x5a/0xa0 [mlxsw_spectrum] [] mlxsw_sp_router_netevent_event+0x80/0x150 [mlxsw_spectrum] [] notifier_call_chain+0x4a/0x70 [] atomic_notifier_call_chain+0x1a/0x20 [] call_netevent_notifiers+0x1b/0x20 [] neigh_update+0x306/0x740 [] neigh_event_ns+0x4e/0xb0 [] arp_process+0x66f/0x700 [] ? common_interrupt+0x8c/0x8c [] arp_rcv+0x139/0x1d0 [] ? vlan_do_receive+0xda/0x320 [] __netif_receive_skb_core+0x524/0xab0 [] ? dev_queue_xmit+0x10/0x20 [] ? br_forward_finish+0x3d/0xc0 [bridge] [] ? br_handle_vlan+0xf6/0x1b0 [bridge] [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xb0 [] netif_receive_skb+0x1c/0x70 [] br_pass_frame_up+0xc6/0x160 [bridge] [] ? deliver_clone+0x37/0x50 [bridge] [] ? br_flood+0xcc/0x160 [bridge] [] br_handle_frame_finish+0x224/0x4f0 [bridge] [] br_handle_frame+0x174/0x300 [bridge] [] __netif_receive_skb_core+0x329/0xab0 [] ? find_next_bit+0x15/0x20 [] ? cpumask_next_and+0x32/0x50 [] ? load_balance+0x178/0x9b0 [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xb0 [] netif_receive_skb+0x1c/0x70 [] mlxsw_sp_rx_listener_func+0x61/0xb0 [mlxsw_spectrum] [] mlxsw_core_skb_receive+0x187/0x200 [mlxsw_core] [] mlxsw_pci_cq_tasklet+0x63a/0x9b0 [mlxsw_pci] [] tasklet_action+0xf6/0x110 [] __do_softirq+0xf6/0x280 [] irq_exit+0xdf/0xf0 [] do_IRQ+0x54/0xd0 [] common_interrupt+0x8c/0x8c The problem is that netdev_all_lower_get_next_rcu() never advances the iterator, thereby causing the loop over the lower adjacency list to run forever. Fix this by advancing the iterator and avoid the infinite loop. Fixes: 7ce856aaaf13 ("mlxsw: spectrum: Add couple of lower device helper functions") Signed-off-by: Ido Schimmel Reported-by: Tamir Winetroub Reviewed-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 136ae6bb..465e128 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3877,7 +3877,7 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, ldev = netdev_all_lower_get_next(dev, &(iter))) #define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ + for (iter = &(dev)->all_adj_list.lower, \ ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ ldev; \ ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) diff --git a/net/core/dev.c b/net/core/dev.c index f1fe26f..b09ac57 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5511,10 +5511,14 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, { struct netdev_adjacent *lower; - lower = list_first_or_null_rcu(&dev->all_adj_list.lower, - struct netdev_adjacent, list); + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->all_adj_list.lower) + return NULL; + + *iter = &lower->list; - return lower ? lower->dev : NULL; + return lower->dev; } EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); -- cgit v0.10.2 From 97c242902c209e7d46e365335db5202634484dcb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Oct 2016 18:50:23 +0200 Subject: switchdev: Execute bridge ndos only for bridge ports We recently got the following warning after setting up a vlan device on top of an offloaded bridge and executing 'bridge link': WARNING: CPU: 0 PID: 18566 at drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c:81 mlxsw_sp_port_orig_get.part.9+0x55/0x70 [mlxsw_spectrum] [...] CPU: 0 PID: 18566 Comm: bridge Not tainted 4.8.0-rc7 #1 Hardware name: Mellanox Technologies Ltd. Mellanox switch/Mellanox switch, BIOS 4.6.5 05/21/2015 0000000000000286 00000000e64ab94f ffff880406e6f8f0 ffffffff8135eaa3 0000000000000000 0000000000000000 ffff880406e6f930 ffffffff8108c43b 0000005106e6f988 ffff8803df398840 ffff880403c60108 ffff880406e6f990 Call Trace: [] dump_stack+0x63/0x90 [] __warn+0xcb/0xf0 [] warn_slowpath_null+0x1d/0x20 [] mlxsw_sp_port_orig_get.part.9+0x55/0x70 [mlxsw_spectrum] [] mlxsw_sp_port_attr_get+0xa5/0xb0 [mlxsw_spectrum] [] switchdev_port_attr_get+0x4f/0x140 [] switchdev_port_attr_get+0x100/0x140 [] switchdev_port_attr_get+0x100/0x140 [] switchdev_port_bridge_getlink+0x5b/0xc0 [] ? switchdev_port_fdb_dump+0x90/0x90 [] rtnl_bridge_getlink+0xe7/0x190 [] netlink_dump+0x122/0x290 [] __netlink_dump_start+0x15f/0x190 [] ? rtnl_bridge_dellink+0x230/0x230 [] rtnetlink_rcv_msg+0x1a6/0x220 [] ? __kmalloc_node_track_caller+0x208/0x2c0 [] ? rtnl_bridge_dellink+0x230/0x230 [] ? rtnl_newlink+0x890/0x890 [] netlink_rcv_skb+0xa4/0xc0 [] rtnetlink_rcv+0x28/0x30 [] netlink_unicast+0x18c/0x240 [] netlink_sendmsg+0x2fb/0x3a0 [] sock_sendmsg+0x38/0x50 [] SYSC_sendto+0x101/0x190 [] ? __sys_recvmsg+0x51/0x90 [] SyS_sendto+0xe/0x10 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 The problem is that the 8021q module propagates the call to ndo_bridge_getlink() via switchdev ops, but the switch driver doesn't recognize the netdev, as it's not offloaded. While we can ignore calls being made to non-bridge ports inside the driver, a better fix would be to push this check up to the switchdev layer. Note that these ndos can be called for non-bridged netdev, but this only happens in certain PF drivers which don't call the corresponding switchdev functions anyway. Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices") Signed-off-by: Ido Schimmel Reported-by: Tamir Winetroub Tested-by: Tamir Winetroub Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 02beb35..3b95fe9 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -771,6 +771,9 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; int err; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + err = switchdev_port_attr_get(dev, &attr); if (err && err != -EOPNOTSUPP) return err; @@ -926,6 +929,9 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlattr *afspec; int err = 0; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); if (protinfo) { @@ -959,6 +965,9 @@ int switchdev_port_bridge_dellink(struct net_device *dev, { struct nlattr *afspec; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (afspec) -- cgit v0.10.2 From 85dda4e5b0ee1f5b4e8cc93d39e475006bc61ccd Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 18 Oct 2016 18:59:34 +0200 Subject: rtnetlink: Add rtnexthop offload flag to compare mask The offload flag is a status flag and should not be used by FIB semantics for comparison. Fixes: 37ed9493699c ("rtnetlink: add RTNH_F_EXTERNAL flag for fib offload") Signed-off-by: Jiri Pirko Reviewed-by: Andy Gospodarek Signed-off-by: David S. Miller diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 262f037..5a78be5 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -350,7 +350,7 @@ struct rtnexthop { #define RTNH_F_OFFLOAD 8 /* offloaded route */ #define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ -#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN) +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD) /* Macros to handle hexthops */ -- cgit v0.10.2 From 82454581d76ccb95535ba6898b39ac5baa123633 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Oct 2016 13:24:07 -0700 Subject: tcp: do not export sysctl_tcp_low_latency Since commit b2fb4f54ecd4 ("tcp: uninline tcp_prequeue()") we no longer access sysctl_tcp_low_latency from a module. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 79d55eb..61b7be3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -86,7 +86,6 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, -- cgit v0.10.2 From 1ecc281ec2f52d715dfc8cb67b6820ea813b9e52 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Oct 2016 21:50:23 +0200 Subject: netfilter: x_tables: suppress kmemcheck warning Markus Trippelsdorf reports: WARNING: kmemcheck: Caught 64-bit read from uninitialized memory (ffff88001e605480) 4055601e0088ffff000000000000000090686d81ffffffff0000000000000000 u u u u u u u u u u u u u u u u i i i i i i i i u u u u u u u u ^ |RIP: 0010:[] [] nf_register_net_hook+0x51/0x160 [..] [] nf_register_net_hook+0x51/0x160 [] nf_register_net_hooks+0x3f/0xa0 [] ipt_register_table+0xe5/0x110 [..] This warning is harmless; we copy 'uninitialized' data from the hook ops but it will not be used. Long term the structures keeping run-time data should be disentangled from those only containing config-time data (such as where in the list to insert a hook), but thats -next material. Reported-by: Markus Trippelsdorf Suggested-by: Al Viro Signed-off-by: Florian Westphal Acked-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e0aa7c1..fc49774 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1513,7 +1513,7 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) if (!num_hooks) return ERR_PTR(-EINVAL); - ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); + ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); -- cgit v0.10.2 From f61851f64b171a684f5a1fa78325756dbbaadadc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 19 Oct 2016 10:20:27 +0200 Subject: Bluetooth: Fix append max 11 bytes of name to scan rsp data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Append maximum of 10 + 1 bytes of name to scan response data. Complete name is appended only if exists and is <= 10 characters. Else append short name if exists or shorten complete name if not. This makes sure name is consistent across multiple advertising instances. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e228842..1015d9c 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -969,41 +969,38 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - size_t complete_len; size_t short_len; - int max_len; - - max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - complete_len = strlen(hdev->dev_name); - short_len = strlen(hdev->short_name); - - /* no space left for name */ - if (max_len < 1) - return ad_len; + size_t complete_len; - /* no name set */ - if (!complete_len) + /* no space left for name (+ NULL + type + len) */ + if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) return ad_len; - /* complete name fits and is eq to max short name len or smaller */ - if (complete_len <= max_len && - complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { + /* use complete name if present and fits */ + complete_len = strlen(hdev->dev_name); + if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len); - } + hdev->dev_name, complete_len + 1); - /* short name set and fits */ - if (short_len && short_len <= max_len) { + /* use short name if present */ + short_len = strlen(hdev->short_name); + if (short_len) return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->short_name, short_len); - } + hdev->short_name, short_len + 1); - /* no short name set so shorten complete name */ - if (!short_len) { - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->dev_name, max_len); + /* use shortened full name if present, we already know that name + * is longer then HCI_MAX_SHORT_NAME_LENGTH + */ + if (complete_len) { + u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; + + memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); + name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; + + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name, + sizeof(name)); } return ad_len; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6b06629..dde77bd 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -106,6 +106,8 @@ static inline void hci_update_background_scan(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len); + static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7360380..1fba2a0 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6017,7 +6017,15 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) +static u8 calculate_name_len(struct hci_dev *hdev) +{ + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; + + return append_local_name(hdev, buf, 0); +} + +static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, + bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; @@ -6030,9 +6038,8 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; } else { - /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) - max_len -= 3; + max_len -= calculate_name_len(hdev); if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) max_len -= 4; @@ -6063,12 +6070,13 @@ static bool appearance_managed(u32 adv_flags) return adv_flags & MGMT_ADV_FLAG_APPEARANCE; } -static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, + u8 len, bool is_adv_data) { int i, cur_len; u8 max_len; - max_len = tlv_data_max_len(adv_flags, is_adv_data); + max_len = tlv_data_max_len(hdev, adv_flags, is_adv_data); if (len > max_len) return false; @@ -6215,8 +6223,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) || - !tlv_data_is_valid(flags, cp->data + cp->adv_data_len, + if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -6429,8 +6437,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, rp.instance = cp->instance; rp.flags = cp->flags; - rp.max_adv_data_len = tlv_data_max_len(flags, true); - rp.max_scan_rsp_len = tlv_data_max_len(flags, false); + rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); -- cgit v0.10.2 From c30a70d3ac60f3216e8d60d7746caad084a7eb46 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 19 Oct 2016 09:06:41 +0200 Subject: stmmac: fix and review the ptp registration. The commit commit 7086605a6ab5 ("stmmac: fix error check when init ptp") breaks the procedure added by the commit efee95f42b5d ("ptp_clock: future-proofing drivers against PTP subsystem becoming optional") So this patch tries to re-import the logic added by the latest commit above: it makes sense to have the stmmac_ptp_register as void function and, inside the main, the stmmac_init_ptp can fails in case of the capability cannot be supported by the HW. Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Cc: Rayagond Kokatanur Cc: Dan Carpenter Cc: Nicolas Pitre Acked-by: Nicolas Pitre Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 8dc9056..b15fc55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -145,7 +145,7 @@ int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); void stmmac_set_ethtool_ops(struct net_device *netdev); -int stmmac_ptp_register(struct stmmac_priv *priv); +void stmmac_ptp_register(struct stmmac_priv *priv); void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_resume(struct device *dev); int stmmac_suspend(struct device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6c85b61..48e71fa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -676,7 +676,9 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; - return stmmac_ptp_register(priv); + stmmac_ptp_register(priv); + + return 0; } static void stmmac_release_ptp(struct stmmac_priv *priv) @@ -1710,7 +1712,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) if (init_ptp) { ret = stmmac_init_ptp(priv); if (ret) - netdev_warn(priv->dev, "PTP support cannot init.\n"); + netdev_warn(priv->dev, "fail to init PTP.\n"); } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 5d61fb2..1477471 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -177,7 +177,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = { * Description: this function will register the ptp clock driver * to kernel. It also does some house keeping work. */ -int stmmac_ptp_register(struct stmmac_priv *priv) +void stmmac_ptp_register(struct stmmac_priv *priv) { spin_lock_init(&priv->ptp_lock); priv->ptp_clock_ops = stmmac_ptp_clock_ops; @@ -185,17 +185,10 @@ int stmmac_ptp_register(struct stmmac_priv *priv) priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops, priv->device); if (IS_ERR(priv->ptp_clock)) { - int ret = PTR_ERR(priv->ptp_clock); - + netdev_err(priv->dev, "ptp_clock_register failed\n"); priv->ptp_clock = NULL; - return ret; - } - - spin_lock_init(&priv->ptp_lock); - - netdev_dbg(priv->dev, "Added PTP HW clock successfully\n"); - - return 0; + } else if (priv->ptp_clock) + netdev_info(priv->dev, "registered PTP clock\n"); } /** -- cgit v0.10.2 From 6add49fff9233a5c43011eb42b521257cbaa9bda Mon Sep 17 00:00:00 2001 From: Jacob Siverskog Date: Thu, 20 Oct 2016 09:05:09 +0200 Subject: Bluetooth: btwilink: Fix probe return value Probe functions should return 0 on success. This driver's probe returns the value returned by hci_register_dev(), which is the hci index. This works for systems with only one hci device (id = 0) but for systems where the btwilink device ends up with an id larger than 0, things will start to fall apart. Make the probe function return 0 on success. Signed-off-by: Jacob Siverskog Signed-off-by: Marcel Holtmann diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c index ef51c9c..b6bb58c 100644 --- a/drivers/bluetooth/btwilink.c +++ b/drivers/bluetooth/btwilink.c @@ -310,7 +310,7 @@ static int bt_ti_probe(struct platform_device *pdev) BT_DBG("HCI device registered (hdev %p)", hdev); dev_set_drvdata(&pdev->dev, hst); - return err; + return 0; } static int bt_ti_remove(struct platform_device *pdev) -- cgit v0.10.2 From 19271c1a083282fbd568f3f214a6182e973626eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 20 Oct 2016 16:05:42 +0200 Subject: mlxsw: spectrum_router: Use correct tree index for binding By a mistake, there is tree index 0 passed to RALTB. Should be MLXSW_SP_LPM_TREE_MIN. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Reported-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 78fc557d..1b3a2cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1862,7 +1862,8 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; - mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 0); + mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_SP_LPM_TREE_MIN); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); if (err) return err; -- cgit v0.10.2 From 37956d78b8f0c5560421f83d1a5e337ee81c685c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 20 Oct 2016 16:05:43 +0200 Subject: mlxsw: spectrum_router: Make mlxsw_sp_router_fib4_del return void and remove warn The function return value is not checked anywhere. Also, the warning causes huge slowdown when removing large number of FIB entries which were not offloaded, because of ordering issue. Ido's preparing a patchset to fix the ordering issue, but that is definitelly not net tree material. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1b3a2cb..f3d50d3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1820,19 +1820,17 @@ err_fib_entry_insert: return err; } -static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, - struct fib_entry_notifier_info *fen_info) +static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_entry *fib_entry; if (mlxsw_sp->router.aborted) - return 0; + return; fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); - if (!fib_entry) { - dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); - return -ENOENT; - } + if (!fib_entry) + return; if (fib_entry->ref_count == 1) { mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); @@ -1840,7 +1838,6 @@ static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, } mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); - return 0; } static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) -- cgit v0.10.2 From 7fb6a36bab6b0b158f93eb13faa1b440f8b26009 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Thu, 20 Oct 2016 16:05:44 +0200 Subject: mlxsw: switchx2: Fix ethernet port initialization When creating an ethernet port fails, we must move the port to disable, otherwise putting the port in switch partition 0 (ETH) or 1 (IB) will always fails. Fixes: 31557f0f9755 ("mlxsw: Introduce Mellanox SwitchX-2 ASIC support") Signed-off-by: Elad Raz Reviewed-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index c0c23e2..92bda87 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1088,6 +1088,7 @@ err_port_stp_state_set: err_port_admin_status_set: err_port_mtu_set: err_port_speed_set: + mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: err_port_system_port_mapping_set: port_not_usable: -- cgit v0.10.2 From 164c971d1cc365dc85e5cf9a9302a91c0cf7f126 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 20 Oct 2016 16:05:45 +0200 Subject: mlxsw: pci: Fix reset wait for SwitchX2 SwitchX2 firmware does not implement reset done yet. Moreover, when busy-polled for ready magic, that slows down firmware and reset takes longer than the defined timeout, causing initialization to fail. So restore the previous behaviour and just sleep in this case. Fixes: 233fa44bd67a ("mlxsw: pci: Implement reset done check") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index e742bd4..912f71f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1838,11 +1838,17 @@ static const struct mlxsw_bus mlxsw_pci_bus = { .cmd_exec = mlxsw_pci_cmd_exec, }; -static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci) +static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, + const struct pci_device_id *id) { unsigned long end; mlxsw_pci_write32(mlxsw_pci, SW_RESET, MLXSW_PCI_SW_RESET_RST_BIT); + if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) { + msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); + return 0; + } + wmb(); /* reset needs to be written before we read control register */ end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); do { @@ -1909,7 +1915,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->pdev = pdev; pci_set_drvdata(pdev, mlxsw_pci); - err = mlxsw_pci_sw_reset(mlxsw_pci); + err = mlxsw_pci_sw_reset(mlxsw_pci, id); if (err) { dev_err(&pdev->dev, "Software reset failed\n"); goto err_sw_reset; -- cgit v0.10.2 From 5712bf9c5c30ade3204016147d7b04bece6952d9 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 19 Oct 2016 17:42:39 +0300 Subject: net/sched: act_mirred: Use passed lastuse argument stats_update callback is called by NIC drivers doing hardware offloading of the mirred action. Lastuse is passed as argument to specify when the stats was actually last updated and is not always the current time. Fixes: 9798e6fe4f9b ('net: act_mirred: allow statistic updates from offloaded actions') Signed-off-by: Paul Blakey Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 667dc38..6b07fba 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -207,8 +207,11 @@ out: static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, u64 lastuse) { - tcf_lastuse_update(&a->tcfa_tm); + struct tcf_mirred *m = to_mirred(a); + struct tcf_t *tm = &m->tcf_tm; + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + tm->lastuse = lastuse; } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, -- cgit v0.10.2 From 7ba5c003db59a6f738d87d92b68d4a91cdf22f60 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:49 +1100 Subject: net/ncsi: Avoid if statements in ncsi_suspend_channel() There are several if/else statements in the state machine implemented by switch/case in ncsi_suspend_channel() to avoid duplicated code. It makes the code a bit hard to be understood. This drops if/else statements in ncsi_suspend_channel() to improve the code readability as Joel Stanley suggested. Also, it becomes easy to add more states in the state machine without affecting current code. No logical changes introduced by this. Suggested-by: Joel Stanley Signed-off-by: Gavin Shan Signed-off-by: David S. Miller diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 5e509e5..655d0d6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -540,42 +540,60 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_suspend_select; /* Fall through */ case ncsi_dev_state_suspend_select: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_SP; + nca.package = np->id; + nca.channel = NCSI_RESERVED_CHANNEL; + if (ndp->flags & NCSI_DEV_HWA) + nca.bytes[0] = 0; + else + nca.bytes[0] = 1; + + nd->state = ncsi_dev_state_suspend_dcnt; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; case ncsi_dev_state_suspend_dcnt: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_DCNT; + nca.package = np->id; + nca.channel = nc->id; + + nd->state = ncsi_dev_state_suspend_dc; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; case ncsi_dev_state_suspend_dc: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_DC; + nca.package = np->id; + nca.channel = nc->id; + nca.bytes[0] = 1; + + nd->state = ncsi_dev_state_suspend_deselect; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; case ncsi_dev_state_suspend_deselect: ndp->pending_req_num = 1; - np = ndp->active_package; - nc = ndp->active_channel; + nca.type = NCSI_PKT_CMD_DP; nca.package = np->id; - if (nd->state == ncsi_dev_state_suspend_select) { - nca.type = NCSI_PKT_CMD_SP; - nca.channel = NCSI_RESERVED_CHANNEL; - if (ndp->flags & NCSI_DEV_HWA) - nca.bytes[0] = 0; - else - nca.bytes[0] = 1; - nd->state = ncsi_dev_state_suspend_dcnt; - } else if (nd->state == ncsi_dev_state_suspend_dcnt) { - nca.type = NCSI_PKT_CMD_DCNT; - nca.channel = nc->id; - nd->state = ncsi_dev_state_suspend_dc; - } else if (nd->state == ncsi_dev_state_suspend_dc) { - nca.type = NCSI_PKT_CMD_DC; - nca.channel = nc->id; - nca.bytes[0] = 1; - nd->state = ncsi_dev_state_suspend_deselect; - } else if (nd->state == ncsi_dev_state_suspend_deselect) { - nca.type = NCSI_PKT_CMD_DP; - nca.channel = NCSI_RESERVED_CHANNEL; - nd->state = ncsi_dev_state_suspend_done; - } + nca.channel = NCSI_RESERVED_CHANNEL; + nd->state = ncsi_dev_state_suspend_done; ret = ncsi_xmit_cmd(&nca); - if (ret) { - nd->state = ncsi_dev_state_functional; - return; - } + if (ret) + goto error; break; case ncsi_dev_state_suspend_done: @@ -589,6 +607,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n", nd->state); } + + return; +error: + nd->state = ncsi_dev_state_functional; } static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) -- cgit v0.10.2 From 008a424a24a904ed12c03b203f6f257bcaf12358 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:50 +1100 Subject: net/ncsi: Fix stale link state of inactive channels on failover The issue was found on BCM5718 which has two NCSI channels in one package: C0 and C1. Both of them are connected to different LANs, means they are in link-up state and C0 is chosen as the active one until resetting BCM5718 happens as below. Resetting BCM5718 results in LSC (Link State Change) AEN packet received on C0, meaning LSC AEN is missed on C1. When LSC AEN packet received on C0 to report link-down, it fails over to C1 because C1 is in link-up state as software can see. However, C1 is in link-down state in hardware. It means the link state is out of synchronization between hardware and software, resulting in inappropriate channel (C1) selected as active one. This resolves the issue by sending separate GLS (Get Link Status) commands to all channels in the package before trying to do failover. The last link states of all channels in the package are retrieved. With it, C0 (not C1) is selected as active one as expected. Signed-off-by: Gavin Shan Signed-off-by: David S. Miller diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 13290a7..eac4858 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -246,6 +246,7 @@ enum { ncsi_dev_state_config_gls, ncsi_dev_state_config_done, ncsi_dev_state_suspend_select = 0x0401, + ncsi_dev_state_suspend_gls, ncsi_dev_state_suspend_dcnt, ncsi_dev_state_suspend_dc, ncsi_dev_state_suspend_deselect, diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 655d0d6..4789f86 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -550,12 +550,38 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) else nca.bytes[0] = 1; - nd->state = ncsi_dev_state_suspend_dcnt; + /* To retrieve the last link states of channels in current + * package when current active channel needs fail over to + * another one. It means we will possibly select another + * channel as next active one. The link states of channels + * are most important factor of the selection. So we need + * accurate link states. Unfortunately, the link states on + * inactive channels can't be updated with LSC AEN in time. + */ + if (ndp->flags & NCSI_DEV_RESHUFFLE) + nd->state = ncsi_dev_state_suspend_gls; + else + nd->state = ncsi_dev_state_suspend_dcnt; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; break; + case ncsi_dev_state_suspend_gls: + ndp->pending_req_num = np->channel_num; + + nca.type = NCSI_PKT_CMD_GLS; + nca.package = np->id; + + nd->state = ncsi_dev_state_suspend_dcnt; + NCSI_FOR_EACH_CHANNEL(np, nc) { + nca.channel = nc->id; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + } + + break; case ncsi_dev_state_suspend_dcnt: ndp->pending_req_num = 1; -- cgit v0.10.2 From bbc7c01e95ceef4e23e343f8cbb6edca887121a5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:51 +1100 Subject: net/ncsi: Choose hot channel as active one if necessary The issue was found on BCM5718 which has two NCSI channels in one package: C0 and C1. C0 is in link-up state while C1 is in link-down state. C0 is chosen as active channel until unplugging and plugging C0's cable: On unplugging C0's cable, LSC (Link State Change) AEN packet received on C0 to report link-down event. After that, C1 is chosen as active channel. LSC AEN for link-up event is lost on C0 when plugging C0's cable back. We lose the network even C0 is usable. This resolves the issue by recording the (hot) channel that was ever chosen as active one. The hot channel is chosen to be active one if none of available channels in link-up state. With this, C0 is still the active one after unplugging C0's cable. LSC AEN packet received on C0 when plugging its cable back. Signed-off-by: Gavin Shan Signed-off-by: David S. Miller diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index eac4858..1308a56 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -265,6 +265,7 @@ struct ncsi_dev_priv { #endif unsigned int package_num; /* Number of packages */ struct list_head packages; /* List of packages */ + struct ncsi_channel *hot_channel; /* Channel was ever active */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ #define NCSI_REQ_START_IDX 1 diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 4789f86..a3bd5fa 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -645,6 +645,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) struct net_device *dev = nd->dev; struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; + struct ncsi_channel *hot_nc = NULL; struct ncsi_cmd_arg nca; unsigned char index; unsigned long flags; @@ -750,12 +751,20 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_config_done: spin_lock_irqsave(&nc->lock, flags); - if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) + if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + hot_nc = nc; nc->state = NCSI_CHANNEL_ACTIVE; - else + } else { + hot_nc = NULL; nc->state = NCSI_CHANNEL_INACTIVE; + } spin_unlock_irqrestore(&nc->lock, flags); + /* Update the hot channel */ + spin_lock_irqsave(&ndp->lock, flags); + ndp->hot_channel = hot_nc; + spin_unlock_irqrestore(&ndp->lock, flags); + ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); break; @@ -773,10 +782,14 @@ error: static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) { struct ncsi_package *np; - struct ncsi_channel *nc, *found; + struct ncsi_channel *nc, *found, *hot_nc; struct ncsi_channel_mode *ncm; unsigned long flags; + spin_lock_irqsave(&ndp->lock, flags); + hot_nc = ndp->hot_channel; + spin_unlock_irqrestore(&ndp->lock, flags); + /* The search is done once an inactive channel with up * link is found. */ @@ -794,6 +807,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) if (!found) found = nc; + if (nc == hot_nc) + found = nc; + ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { spin_unlock_irqrestore(&nc->lock, flags); -- cgit v0.10.2 From 22d8aa93d7ea169a349fb2f9dee5babc68f6a683 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:52 +1100 Subject: net/ncsi: Improve HNCDSC AEN handler This improves AEN handler for Host Network Controller Driver Status Change (HNCDSC): * The channel's lock should be hold when accessing its state. * Do failover when host driver isn't ready. * Configure channel when host driver becomes ready. Signed-off-by: Gavin Shan Signed-off-by: David S. Miller diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index b41a661..6898e72 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -141,23 +141,35 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, return -ENODEV; /* If the channel is active one, we need reconfigure it */ + spin_lock_irqsave(&nc->lock, flags); ncm = &nc->modes[NCSI_MODE_LINK]; hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE || - (ncm->data[3] & 0x1)) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); return 0; + } - if (ndp->flags & NCSI_DEV_HWA) + spin_unlock_irqrestore(&nc->lock, flags); + if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1)) ndp->flags |= NCSI_DEV_RESHUFFLE; /* If this channel is the active one and the link doesn't * work, we have to choose another channel to be active one. * The logic here is exactly similar to what we do when link * is down on the active channel. + * + * On the other hand, we need configure it when host driver + * state on the active channel becomes ready. */ ncsi_stop_channel_monitor(nc); + + spin_lock_irqsave(&nc->lock, flags); + nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE : + NCSI_CHANNEL_ACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); -- cgit v0.10.2 From 8be0328e52dc2c8c6f61563c9ccc95ae4d63e9ce Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 20 Oct 2016 10:01:28 +0200 Subject: stmmac: display the descriptors if DES0 = 0 It makes sense to display the descriptors even if DES0 is zero. This helps for example in case of it is needed to dump rx write-back descriptors to get timestamp status. Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 4ec7397..a1b17cd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -347,10 +347,9 @@ static void dwmac4_display_ring(void *head, unsigned int size, bool rx) pr_info("%s descriptor ring:\n", rx ? "RX" : "TX"); for (i = 0; i < size; i++) { - if (p->des0) - pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(p), - p->des0, p->des1, p->des2, p->des3); + pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", + i, (unsigned int)virt_to_phys(p), + p->des0, p->des1, p->des2, p->des3); p++; } } -- cgit v0.10.2 From 7bb6615d395a7c130053728f3a64f6df6b2e1f18 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 18 Oct 2016 14:37:32 +0200 Subject: netfilter: conntrack: restart gc immediately if GC_MAX_EVICTS is reached When the maximum evictions number is reached, do not wait 5 seconds before the next run. CC: Florian Westphal Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ba6a1d4..df2f5a3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -983,7 +983,7 @@ static void gc_worker(struct work_struct *work) return; ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) next_run = 0; gc_work->last_bucket = i; -- cgit v0.10.2 From 7034b566a4e7d550621c2dfafd380b77b3787cd9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Oct 2016 18:05:32 +0100 Subject: netfilter: fix nf_queue handling nf_queue handling is broken since e3b37f11e6e4 ("netfilter: replace list_head with single linked list") for two reasons: 1) If the bypass flag is set on, there are no userspace listeners and we still have more hook entries to iterate over, then jump to the next hook. Otherwise accept the packet. On nf_reinject() path, the okfn() needs to be invoked. 2) We should not re-enter the same hook on packet reinjection. If the packet is accepted, we have to skip the current hook from where the packet was enqueued, otherwise the packets gets enqueued over and over again. This restores the previous list_for_each_entry_continue() behaviour happening from nf_iterate() that was dealing with these two cases. This patch introduces a new nf_queue() wrapper function so this fix becomes simpler. Fixes: e3b37f11e6e4 ("netfilter: replace list_head with single linked list") Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fcb5d1d..004af03 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -361,16 +361,9 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err; - - RCU_INIT_POINTER(state->hook_entries, entry); - err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) - goto next_hook; - kfree_skb(skb); - } + ret = nf_queue(skb, state, &entry, verdict); + if (ret == 1 && entry) + goto next_hook; } return ret; } diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index e0adb59..9fdb655 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -18,7 +18,7 @@ unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - unsigned int queuenum); + struct nf_hook_entry **entryp, unsigned int verdict); void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 96964a0..8f08d75 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -107,13 +107,8 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) rcu_read_unlock(); } -/* - * Any packet that leaves via this function must come back - * through nf_reinject(). - */ -int nf_queue(struct sk_buff *skb, - struct nf_hook_state *state, - unsigned int queuenum) +static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, + unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -161,6 +156,27 @@ err: return status; } +/* Packets leaving via this function must come back through nf_reinject(). */ +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp, unsigned int verdict) +{ + struct nf_hook_entry *entry = *entryp; + int ret; + + RCU_INIT_POINTER(state->hook_entries, entry); + ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + if (ret < 0) { + if (ret == -ESRCH && + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { + *entryp = rcu_dereference(entry->next); + return 1; + } + kfree_skb(skb); + } + + return 0; +} + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct nf_hook_entry *hook_entry; @@ -187,26 +203,26 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) entry->state.thresh = INT_MIN; if (verdict == NF_ACCEPT) { - next_hook: - verdict = nf_iterate(skb, &entry->state, &hook_entry); + hook_entry = rcu_dereference(hook_entry->next); + if (hook_entry) +next_hook: + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: +okfn: local_bh_disable(); entry->state.okfn(entry->state.net, entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: - RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); - err = nf_queue(skb, &entry->state, - verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) + err = nf_queue(skb, &entry->state, &hook_entry, verdict); + if (err == 1) { + if (hook_entry) goto next_hook; - kfree_skb(skb); + goto okfn; } break; case NF_STOLEN: -- cgit v0.10.2 From 7aa8e63f0d0f2e0ae353632bca1ce75a258696c6 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 20 Oct 2016 12:29:26 +0200 Subject: ipv6: properly prevent temp_prefered_lft sysctl race The check for an underflow of tmp_prefered_lft is always false because tmp_prefered_lft is unsigned. The intention of the check was to guard against racing with an update of the temp_prefered_lft sysctl, potentially resulting in an underflow. As suggested by David Miller, the best way to prevent the race is by reading the sysctl variable using READ_ONCE. Signed-off-by: Jiri Bohac Reported-by: Julia Lawall Fixes: 76506a986dc3 ("IPv6: fix DESYNC_FACTOR") Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cc7c26d..060dd99 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1185,6 +1185,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i u32 addr_flags; unsigned long now = jiffies; long max_desync_factor; + s32 cnf_temp_preferred_lft; write_lock_bh(&idev->lock); if (ift) { @@ -1228,9 +1229,10 @@ retry: /* recalculate max_desync_factor each time and update * idev->desync_factor if it's larger */ + cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); max_desync_factor = min_t(__u32, idev->cnf.max_desync_factor, - idev->cnf.temp_prefered_lft - regen_advance); + cnf_temp_preferred_lft - regen_advance); if (unlikely(idev->desync_factor > max_desync_factor)) { if (max_desync_factor > 0) { @@ -1245,11 +1247,8 @@ retry: tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); - tmp_prefered_lft = idev->cnf.temp_prefered_lft + age - + tmp_prefered_lft = cnf_temp_preferred_lft + age - idev->desync_factor; - /* guard against underflow in case of concurrent updates to cnf */ - if (unlikely(tmp_prefered_lft < 0)) - tmp_prefered_lft = 0; tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft); tmp_plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; -- cgit v0.10.2 From fcd91dd449867c6bfe56a81cabba76b829fd05cd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 20 Oct 2016 15:58:02 +0200 Subject: net: add recursion limit to GRO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, GRO can do unlimited recursion through the gro_receive handlers. This was fixed for tunneling protocols by limiting tunnel GRO to one level with encap_mark, but both VLAN and TEB still have this problem. Thus, the kernel is vulnerable to a stack overflow, if we receive a packet composed entirely of VLAN headers. This patch adds a recursion counter to the GRO layer to prevent stack overflow. When a gro_receive function hits the recursion limit, GRO is aborted for this skb and it is processed normally. This recursion counter is put in the GRO CB, but could be turned into a percpu counter if we run out of space in the CB. Thanks to Vladimír Beneš for the initial bug report. Fixes: CVE-2016-7039 Fixes: 9b174d88c257 ("net: Add Transparent Ethernet Bridging GRO support.") Fixes: 66e5133f19e9 ("vlan: Add GRO support for non hardware accelerated vlan") Signed-off-by: Sabrina Dubroca Reviewed-by: Jiri Benc Acked-by: Hannes Frederic Sowa Acked-by: Tom Herbert Signed-off-by: David S. Miller diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 3c20e87..16af1ce 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -453,7 +453,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk, skb_gro_pull(skb, gh_len); skb_gro_postpull_rcsum(skb, gh, gh_len); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e7d1668..c1639a3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -583,7 +583,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk, } } - pp = eth_gro_receive(head, skb); + pp = call_gro_receive(eth_gro_receive, head, skb); flush = 0; out: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 465e128..91ee364 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2169,7 +2169,10 @@ struct napi_gro_cb { /* Used to determine if flush_id can be ignored */ u8 is_atomic:1; - /* 5 bit hole */ + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* 1 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2180,6 +2183,40 @@ struct napi_gro_cb { #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *); +static inline struct sk_buff **call_gro_receive(gro_receive_t cb, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **, + struct sk_buff *); +static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8de138d..f2531ad 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -664,7 +664,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index b09ac57..dbc8713 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4511,6 +4511,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->recursion_counter = 0; NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 66dff5e..02acfff 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -439,7 +439,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1effc98..9648c97 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1391,7 +1391,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index cf50f7e..030d153 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -249,7 +249,7 @@ static struct sk_buff **fou_gro_receive(struct sock *sk, if (!ops || !ops->callbacks.gro_receive) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -441,7 +441,7 @@ next_proto: if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 96e0efe..d5cac99 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -229,7 +229,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ skb_gro_postpull_rcsum(skb, greh, grehlen); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9333c9..b2be1d9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -295,7 +295,7 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - pp = udp_sk(sk)->gro_receive(sk, head, skb); + pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e7bfd55..1fcf61f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -246,7 +246,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, iph, nlen); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); -- cgit v0.10.2 From 0d906b1e8d4002cdd59590fec630f4e75023e288 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 20 Oct 2016 17:13:53 +0200 Subject: bpf, test: fix ld_abs + vlan push/pop stress test After commit 636c2628086e ("net: skbuff: Remove errornous length validation in skb_vlan_pop()") mentioned test case stopped working, throwing a -12 (ENOMEM) return code. The issue however is not due to 636c2628086e, but rather due to a buggy test case that got uncovered from the change in behaviour in 636c2628086e. The data_size of that test case for the skb was set to 1. In the bpf_fill_ld_abs_vlan_push_pop() handler bpf insns are generated that loop with: reading skb data, pushing 68 tags, reading skb data, popping 68 tags, reading skb data, etc, in order to force a skb expansion and thus trigger that JITs recache skb->data. Problem is that initial data_size is too small. While before 636c2628086e, the test silently bailed out due to the skb->len < VLAN_ETH_HLEN check with returning 0, and now throwing an error from failing skb_ensure_writable(). Set at least minimum of ETH_HLEN as an initial length so that on first push of data, equivalent pop will succeed. Fixes: 4d9c5c53ac99 ("test_bpf: add bpf_skb_vlan_push/pop() tests") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 94346b4..0362da0 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4831,7 +4831,7 @@ static struct bpf_test tests[] = { { }, INTERNAL, { 0x34 }, - { { 1, 0xbef } }, + { { ETH_HLEN, 0xbef } }, .fill_helper = bpf_fill_ld_abs_vlan_push_pop, }, /* -- cgit v0.10.2 From 2399d6143f85b155ae84ccd94237befd36b8f6c7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Oct 2016 09:32:19 -0700 Subject: net: dsa: bcm_sf2: Prevent GPHY shutdown for kexec'd kernels For a kernel that is being kexec'd we re-enable the integrated GPHY in order for the subsequent MDIO bus scan to succeed and properly bind to the bcm7xxx PHY driver. If we did not do that, the GPHY would be shut down by the time the MDIO driver is probing the bus, and it would fail to read the correct PHY OUI and therefore bind to an appropriate PHY driver. Later on, this would cause DSA not to be able to successfully attach to the PHY, and the interface would not be created at all. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0427009..077a245 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "bcm_sf2.h" #include "bcm_sf2_regs.h" @@ -1133,6 +1134,18 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) return 0; } +static void bcm_sf2_sw_shutdown(struct platform_device *pdev) +{ + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + + /* For a kernel about to be kexec'd we want to keep the GPHY on for a + * successful MDIO bus scan to occur. If we did turn off the GPHY + * before (e.g: port_disable), this will also power it back on. + */ + if (priv->hw_params.num_gphy == 1) + bcm_sf2_gphy_enable_set(priv->dev->ds, kexec_in_progress); +} + #ifdef CONFIG_PM_SLEEP static int bcm_sf2_suspend(struct device *dev) { @@ -1163,6 +1176,7 @@ MODULE_DEVICE_TABLE(of, bcm_sf2_of_match); static struct platform_driver bcm_sf2_driver = { .probe = bcm_sf2_sw_probe, .remove = bcm_sf2_sw_remove, + .shutdown = bcm_sf2_sw_shutdown, .driver = { .name = "brcm-sf2", .of_match_table = bcm_sf2_of_match, -- cgit v0.10.2 From 286c72deabaa240b7eebbd99496ed3324d69f3c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2016 09:39:40 -0700 Subject: udp: must lock the socket in udp_disconnect() Baozeng Ding reported KASAN traces showing uses after free in udp_lib_get_port() and other related UDP functions. A CONFIG_DEBUG_PAGEALLOC=y kernel would eventually crash. I could write a reproducer with two threads doing : static int sock_fd; static void *thr1(void *arg) { for (;;) { connect(sock_fd, (const struct sockaddr *)arg, sizeof(struct sockaddr_in)); } } static void *thr2(void *arg) { struct sockaddr_in unspec; for (;;) { memset(&unspec, 0, sizeof(unspec)); connect(sock_fd, (const struct sockaddr *)&unspec, sizeof(unspec)); } } Problem is that udp_disconnect() could run without holding socket lock, and this was causing list corruptions. Signed-off-by: Eric Dumazet Reported-by: Baozeng Ding Signed-off-by: David S. Miller diff --git a/include/net/udp.h b/include/net/udp.h index ea53a87..4948790 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -258,6 +258,7 @@ void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7cf7d6e..205e200 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -994,7 +994,7 @@ struct proto ping_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, .sendmsg = ping_v4_sendmsg, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 90a85c9..ecbe5a7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -918,7 +918,7 @@ struct proto raw_prot = { .close = raw_close, .destroy = raw_destroy, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = raw_ioctl, .init = raw_init, .setsockopt = raw_setsockopt, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d96dc2..311613e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1345,7 +1345,7 @@ csum_copy_err: goto try_again; } -int udp_disconnect(struct sock *sk, int flags) +int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); /* @@ -1367,6 +1367,15 @@ int udp_disconnect(struct sock *sk, int flags) sk_dst_reset(sk); return 0; } +EXPORT_SYMBOL(__udp_disconnect); + +int udp_disconnect(struct sock *sk, int flags) +{ + lock_sock(sk); + __udp_disconnect(sk, flags); + release_sock(sk); + return 0; +} EXPORT_SYMBOL(udp_disconnect); void udp_lib_unhash(struct sock *sk) @@ -2193,7 +2202,7 @@ int udp_abort(struct sock *sk, int err) sk->sk_err = err; sk->sk_error_report(sk); - udp_disconnect(sk, 0); + __udp_disconnect(sk, 0); release_sock(sk); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 0e983b6..66e2d9d 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -180,7 +180,7 @@ struct proto pingv6_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .sendmsg = ping_v6_sendmsg, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 54404f0..054a1d8 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1241,7 +1241,7 @@ struct proto rawv6_prot = { .close = rawv6_close, .destroy = raw6_destroy, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, .setsockopt = rawv6_setsockopt, diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 42de4cc..fce25af 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -338,7 +338,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ea2ae66..ad3468c 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,7 +410,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, -- cgit v0.10.2 From a681574c99be23e4d20b769bf0e543239c364af5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2016 10:26:48 -0700 Subject: ipv4: disable BH in set_ping_group_range() In commit 4ee3bd4a8c746 ("ipv4: disable BH when changing ip local port range") Cong added BH protection in set_local_port_range() but missed that same fix was needed in set_ping_group_range() Fixes: b8f1a55639e6 ("udp: Add function to make source port for UDP tunnels") Signed-off-by: Eric Dumazet Reported-by: Eric Salo Signed-off-by: David S. Miller diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1cb67de..500ae40 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -109,10 +109,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock_bh(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ -- cgit v0.10.2 From 97dcaa0fcfd24daa9a36c212c1ad1d5a97759212 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Oct 2016 18:15:16 -0700 Subject: kexec: Export kexec_in_progress to modules The bcm_sf2 driver uses kexec_in_progress to know whether it can power down an integrated PHY during shutdown, and can be built as a module. Other modules may be using this in the future, so export it. Fixes: 2399d6143f85 ("net: dsa: bcm_sf2: Prevent GPHY shutdown for kexec'd kernels") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5616755..786ab85 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -59,6 +59,7 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; +EXPORT_SYMBOL_GPL(kexec_in_progress); /* Location of the reserved area for the crash kernel */ -- cgit v0.10.2 From 9799c50372b23ed774791bdb87d700f1286ee8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 6 Oct 2016 01:43:08 +0200 Subject: batman-adv: fix splat on disabling an interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As long as there is still a reference for a hard interface held, there might still be a forwarding packet relying on its attributes. Therefore avoid setting hard_iface->soft_iface to NULL when disabling a hard interface. This fixes the following, potential splat: batman_adv: bat0: Interface deactivated: eth1 batman_adv: bat0: Removing interface: eth1 cgroup: new mount options do not match the existing superblock, will be ignored batman_adv: bat0: Interface deactivated: eth3 batman_adv: bat0: Removing interface: eth3 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1986 at ./net/batman-adv/bat_iv_ogm.c:549 batadv_iv_send_outstanding_bat_ogm_packet+0x145/0x643 [batman_adv] Modules linked in: batman_adv(O-) <...> CPU: 3 PID: 1986 Comm: kworker/u8:2 Tainted: G W O 4.6.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet [batman_adv] 0000000000000000 ffff88001d93bca0 ffffffff8126c26b 0000000000000000 0000000000000000 ffff88001d93bcf0 ffffffff81051615 ffff88001f19f818 000002251d93bd68 0000000000000046 ffff88001dc04a00 ffff88001becbe48 Call Trace: [] dump_stack+0x67/0x90 [] __warn+0xc7/0xe5 [] warn_slowpath_null+0x18/0x1a [] batadv_iv_send_outstanding_bat_ogm_packet+0x145/0x643 [batman_adv] [] ? __lock_is_held+0x32/0x54 [] process_one_work+0x2a8/0x4f5 [] ? process_one_work+0x15c/0x4f5 [] worker_thread+0x1d5/0x2c0 [] ? process_scheduled_works+0x2e/0x2e [] ? process_scheduled_works+0x2e/0x2e [] kthread+0xc0/0xc8 [] ret_from_fork+0x22/0x40 [] ? __init_kthread_worker+0x55/0x55 ---[ end trace 647f9f325123dc05 ]--- What happened here is, that there was still a forw_packet (here: a BATMAN IV OGM) in the queue of eth3 with the forw_packet->if_incoming set to eth1 and the forw_packet->if_outgoing set to eth3. When eth3 is to be deactivated and removed, then this thread waits for the forw_packet queued on eth3 to finish. Because eth1 was deactivated and removed earlier and by that had forw_packet->if_incoming->soft_iface, set to NULL, the splat when trying to send/flush the OGM on eth3 occures. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Linus Lüssing [sven@narfation.org: Reduced size of Oops message] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 08ce361..e034afb 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -652,7 +652,6 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_softif_destroy_sysfs(hard_iface->soft_iface); } - hard_iface->soft_iface = NULL; batadv_hardif_put(hard_iface); out: -- cgit v0.10.2 From 8651be8f14a12d24f203f283601d9b0418c389ff Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 19 Oct 2016 23:35:12 -0700 Subject: ipv6: fix a potential deadlock in do_ipv6_setsockopt() Baozeng reported this deadlock case: CPU0 CPU1 ---- ---- lock([ 165.136033] sk_lock-AF_INET6); lock([ 165.136033] rtnl_mutex); lock([ 165.136033] sk_lock-AF_INET6); lock([ 165.136033] rtnl_mutex); Similar to commit 87e9f0315952 ("ipv4: fix a potential deadlock in mcast getsockopt() path") this is due to we still have a case, ipv6_sock_mc_close(), where we acquire sk_lock before rtnl_lock. Close this deadlock with the similar solution, that is always acquire rtnl lock first. Fixes: baf606d9c9b1 ("ipv4,ipv6: grab rtnl before locking the socket") Reported-by: Baozeng Ding Tested-by: Baozeng Ding Cc: Marcelo Ricardo Leitner Signed-off-by: Cong Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f2d0727..8f998af 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -174,6 +174,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); +void __ipv6_sock_mc_close(struct sock *sk); void ipv6_sock_mc_close(struct sock *sk); bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5330262..636ec56 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -120,6 +120,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, static bool setsockopt_needs_rtnl(int optname) { switch (optname) { + case IPV6_ADDRFORM: case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: case IPV6_JOIN_ANYCAST: @@ -198,7 +199,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } fl6_free_socklist(sk); - ipv6_sock_mc_close(sk); + __ipv6_sock_mc_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 75c1fc5..14a3903 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -276,16 +276,14 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, return idev; } -void ipv6_sock_mc_close(struct sock *sk) +void __ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - if (!rcu_access_pointer(np->ipv6_mc_list)) - return; + ASSERT_RTNL(); - rtnl_lock(); while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) { struct net_device *dev; @@ -303,8 +301,17 @@ void ipv6_sock_mc_close(struct sock *sk) atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); - } +} + +void ipv6_sock_mc_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!rcu_access_pointer(np->ipv6_mc_list)) + return; + rtnl_lock(); + __ipv6_sock_mc_close(sk); rtnl_unlock(); } -- cgit v0.10.2 From 2a73306b6096fafd5c2ae06ded1f92bbacb39df2 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 21 Oct 2016 21:49:14 -0400 Subject: netns: revert "netns: avoid disabling irq for netns id" This reverts commit bc51dddf98c9 ("netns: avoid disabling irq for netns id") as it was found to cause problems with systems running SELinux/audit, see the mailing list thread below: * http://marc.info/?t=147694653900002&r=1&w=2 Eventually we should be able to reintroduce this code once we have rewritten the audit multicast code to queue messages much the same way we do for unicast messages. A tracking issue for this can be found below: * https://github.com/linux-audit/audit-kernel/issues/23 Reported-by: Stephen Smalley Reported-by: Elad Raz Cc: Cong Wang Signed-off-by: Paul Moore Signed-off-by: David S. Miller diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 989434f..f61c0e0 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -215,13 +215,14 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { + unsigned long flags; bool alloc; int id; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; @@ -230,11 +231,12 @@ int peernet2id_alloc(struct net *net, struct net *peer) /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { + unsigned long flags; int id; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); id = __peernet2id(net, peer); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); return id; } EXPORT_SYMBOL(peernet2id); @@ -249,17 +251,18 @@ bool peernet_has_id(struct net *net, struct net *peer) struct net *get_net_ns_by_id(struct net *net, int id) { + unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); return peer; @@ -422,17 +425,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_bh(&tmp->nsid_lock); + spin_lock_irq(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); + spin_unlock_irq(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_bh(&net->nsid_lock); + spin_lock_irq(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irq(&net->nsid_lock); } rtnl_unlock(); @@ -561,6 +564,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + unsigned long flags; struct net *peer; int nsid, err; @@ -581,15 +585,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); if (__peernet2id(net, peer) >= 0) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -711,10 +715,11 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; + unsigned long flags; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); cb->args[0] = net_cb.idx; return skb->len; -- cgit v0.10.2 From 0ce66d4f70d3bcfb67c89f473e76e7cf0a01288f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 21 Oct 2016 14:21:55 -0700 Subject: Revert "kexec: Export kexec_in_progress to modules" This reverts commit 97dcaa0fcfd24daa9a36c212c1ad1d5a97759212. Based on the review discussion with Eric, we will come up with a different fix for the bcm_sf2 driver which does not make it rely on the kexec_in_progress value. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 786ab85..5616755 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -59,7 +59,6 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; -EXPORT_SYMBOL_GPL(kexec_in_progress); /* Location of the reserved area for the crash kernel */ -- cgit v0.10.2 From 4a2947e32ea9ca2bc36d68022b7d7b1014dca18f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 21 Oct 2016 14:21:56 -0700 Subject: net: dsa: bcm_sf2: Do not rely on kexec_in_progress After discussing with Eric, it turns out that, while using kexec_in_progress is a nice optimization, which prevents us from always powering on the integrated PHY, let's just turn it on in the shutdown path. This removes a dependency on kexec_in_progress which, according to Eric should not be used by modules Fixes: 2399d6143f85 ("net: dsa: bcm_sf2: Prevent GPHY shutdown for kexec'd kernels") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 077a245..e3ee27c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -30,7 +30,6 @@ #include #include #include -#include #include "bcm_sf2.h" #include "bcm_sf2_regs.h" @@ -1141,9 +1140,11 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev) /* For a kernel about to be kexec'd we want to keep the GPHY on for a * successful MDIO bus scan to occur. If we did turn off the GPHY * before (e.g: port_disable), this will also power it back on. + * + * Do not rely on kexec_in_progress, just power the PHY on. */ if (priv->hw_params.num_gphy == 1) - bcm_sf2_gphy_enable_set(priv->dev->ds, kexec_in_progress); + bcm_sf2_gphy_enable_set(priv->dev->ds, true); } #ifdef CONFIG_PM_SLEEP -- cgit v0.10.2 From 396a30cce15d084b2b1a395aa6d515c3d559c674 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 20 Oct 2016 14:19:46 -0700 Subject: ipv4: use the right lock for ping_group_range This reverts commit a681574c99be23e4d20b769bf0e543239c364af5 ("ipv4: disable BH in set_ping_group_range()") because we never read ping_group_range in BH context (unlike local_port_range). Then, since we already have a lock for ping_group_range, those using ip_local_ports.lock for ping_group_range are clearly typos. We might consider to share a same lock for both ping_group_range and local_port_range w.r.t. space saving, but that should be for net-next. Fixes: a681574c99be ("ipv4: disable BH in set_ping_group_range()") Fixes: ba6b918ab234 ("ping: move ping_group_range out of CONFIG_SYSCTL") Cc: Eric Dumazet Cc: Eric Salo Signed-off-by: Cong Wang Signed-off-by: David S. Miller diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 500ae40..80bc36b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -96,11 +96,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } /* Update system visible IP port range */ @@ -109,10 +109,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock_bh(&net->ipv4.ip_local_ports.lock); + write_seqlock(&net->ipv4.ping_group_range.lock); data[0] = low; data[1] = high; - write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); + write_sequnlock(&net->ipv4.ping_group_range.lock); } /* Validate changes from /proc interface. */ -- cgit v0.10.2 From bdc8cbd34d5d9d82c3db4df6a182b0530040062d Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:39 -0400 Subject: qede: get_channels() need to populate max tx/rx coalesce values Recent changes in kernel ethtool implementation requires the driver callback for get_channels() has to populate the values for max tx/rx coalesce fields. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 25a9b29..16a1a47 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -756,6 +756,8 @@ static void qede_get_channels(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); channels->max_combined = QEDE_MAX_RSS_CNT(edev); + channels->max_rx = QEDE_MAX_RSS_CNT(edev); + channels->max_tx = QEDE_MAX_RSS_CNT(edev); channels->combined_count = QEDE_QUEUE_CNT(edev) - edev->fp_num_tx - edev->fp_num_rx; channels->tx_count = edev->fp_num_tx; -- cgit v0.10.2 From ba300ce35123a9ba8ea55318b0f579cc1ab3f51a Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:40 -0400 Subject: qede: Do not allow RSS config for 100G devices RSS configuration is not supported for 100G adapters. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 16a1a47..bf04fb9 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1055,6 +1055,12 @@ static int qede_set_rxfh(struct net_device *dev, const u32 *indir, struct qede_dev *edev = netdev_priv(dev); int i; + if (edev->dev_info.common.num_hwfns > 1) { + DP_INFO(edev, + "RSS configuration is not supported for 100G devices\n"); + return -EOPNOTSUPP; + } + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; -- cgit v0.10.2 From 837d4eb6ed7cb0341079fac97e3037df6bef7482 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:41 -0400 Subject: qede: Loopback implementation should ignore the normal traffic During the execution of loopback test, driver may receive the packets which are not originated by this test, loopback implementation need to skip those packets. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 28c0e9f..d6a4a9a5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -348,6 +348,7 @@ bool qede_has_rx_work(struct qede_rx_queue *rxq); int qede_txq_has_work(struct qede_tx_queue *txq); void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev, u8 count); +void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq); #define RX_RING_SIZE_POW 13 #define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index bf04fb9..7da184c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1207,8 +1207,8 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) struct qede_rx_queue *rxq = NULL; struct sw_rx_data *sw_rx_data; union eth_rx_cqe *cqe; + int i, rc = 0; u8 *data_ptr; - int i; for_each_queue(i) { if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { @@ -1227,46 +1227,60 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) * queue and that the loopback traffic is not IP. */ for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) { - if (qede_has_rx_work(rxq)) + if (!qede_has_rx_work(rxq)) { + usleep_range(100, 200); + continue; + } + + hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr); + sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring); + + /* Memory barrier to prevent the CPU from doing speculative + * reads of CQE/BD before reading hw_comp_cons. If the CQE is + * read before it is written by FW, then FW writes CQE and SB, + * and then the CPU reads the hw_comp_cons, it will use an old + * CQE. + */ + rmb(); + + /* Get the CQE from the completion ring */ + cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring); + + /* Get the data from the SW ring */ + sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX; + sw_rx_data = &rxq->sw_rx_ring[sw_rx_index]; + fp_cqe = &cqe->fast_path_regular; + len = le16_to_cpu(fp_cqe->len_on_first_bd); + data_ptr = (u8 *)(page_address(sw_rx_data->data) + + fp_cqe->placement_offset + + sw_rx_data->page_offset); + if (ether_addr_equal(data_ptr, edev->ndev->dev_addr) && + ether_addr_equal(data_ptr + ETH_ALEN, + edev->ndev->dev_addr)) { + for (i = ETH_HLEN; i < len; i++) + if (data_ptr[i] != (unsigned char)(i & 0xff)) { + rc = -1; + break; + } + + qede_recycle_rx_bd_ring(rxq, edev, 1); + qed_chain_recycle_consumed(&rxq->rx_comp_ring); break; - usleep_range(100, 200); + } + + DP_INFO(edev, "Not the transmitted packet\n"); + qede_recycle_rx_bd_ring(rxq, edev, 1); + qed_chain_recycle_consumed(&rxq->rx_comp_ring); } - if (!qede_has_rx_work(rxq)) { + if (i == QEDE_SELFTEST_POLL_COUNT) { DP_NOTICE(edev, "Failed to receive the traffic\n"); return -1; } - hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr); - sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring); - - /* Memory barrier to prevent the CPU from doing speculative reads of CQE - * / BD before reading hw_comp_cons. If the CQE is read before it is - * written by FW, then FW writes CQE and SB, and then the CPU reads the - * hw_comp_cons, it will use an old CQE. - */ - rmb(); - - /* Get the CQE from the completion ring */ - cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring); - - /* Get the data from the SW ring */ - sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX; - sw_rx_data = &rxq->sw_rx_ring[sw_rx_index]; - fp_cqe = &cqe->fast_path_regular; - len = le16_to_cpu(fp_cqe->len_on_first_bd); - data_ptr = (u8 *)(page_address(sw_rx_data->data) + - fp_cqe->placement_offset + sw_rx_data->page_offset); - for (i = ETH_HLEN; i < len; i++) - if (data_ptr[i] != (unsigned char)(i & 0xff)) { - DP_NOTICE(edev, "Loopback test failed\n"); - qede_recycle_rx_bd_ring(rxq, edev, 1); - return -1; - } - - qede_recycle_rx_bd_ring(rxq, edev, 1); + qede_update_rx_prod(edev, rxq); - return 0; + return rc; } static int qede_selftest_run_loopback(struct qede_dev *edev, u32 loopback_mode) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 343038c..da8ef69 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -943,8 +943,7 @@ static inline int qede_realloc_rx_buffer(struct qede_dev *edev, return 0; } -static inline void qede_update_rx_prod(struct qede_dev *edev, - struct qede_rx_queue *rxq) +void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq) { u16 bd_prod = qed_chain_get_prod_idx(&rxq->rx_bd_ring); u16 cqe_prod = qed_chain_get_prod_idx(&rxq->rx_comp_ring); -- cgit v0.10.2 From 0e191827383a6503a3bc547e63c74ff093f450f5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:42 -0400 Subject: qed*: Reduce the memory footprint for Rx path With the current default values for Rx path i.e., 8 queues of 8Kb entries each with 4Kb size, interface will consume 256Mb for Rx. The default values causing the driver probe to fail when the system memory is low. Based on the perforamnce results, rx-ring count value of 1Kb gives the comparable performance with Rx coalesce timeout of 12 seconds. Updating the default values. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 8dc3f46..c418360 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -878,6 +878,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, } } + cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS; rc = qed_nic_setup(cdev); if (rc) goto err; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index d6a4a9a5..974689a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -354,7 +354,7 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq); #define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) #define NUM_RX_BDS_MAX (RX_RING_SIZE - 1) #define NUM_RX_BDS_MIN 128 -#define NUM_RX_BDS_DEF NUM_RX_BDS_MAX +#define NUM_RX_BDS_DEF ((u16)BIT(10) - 1) #define TX_RING_SIZE_POW 13 #define TX_RING_SIZE ((u16)BIT(TX_RING_SIZE_POW)) diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index f9ae903..8978a60 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -146,6 +146,7 @@ enum qed_led_mode { #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) #define QED_COALESCE_MAX 0xFF +#define QED_DEFAULT_RX_USECS 12 /* forward */ struct qed_dev; -- cgit v0.10.2 From ed0dd91515bbe19a19cfccca366cf163ed581cac Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:43 -0400 Subject: qede: Reconfigure rss indirection direction table when rss count is updated Rx indirection table entries are in the range [0, (rss_count - 1)]. If user reduces the rss count, the table entries may not be in the ccorrect range. Need to reconfigure the table with new rss_count as a basis. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 7da184c..d47bdaa 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -822,6 +822,13 @@ static int qede_set_channels(struct net_device *dev, edev->req_queues = count; edev->req_num_tx = channels->tx_count; edev->req_num_rx = channels->rx_count; + /* Reset the indirection table if rx queue count is updated */ + if ((edev->req_queues - edev->req_num_tx) != QEDE_RSS_COUNT(edev)) { + edev->rss_params_inited &= ~QEDE_RSS_INDIR_INITED; + memset(&edev->rss_params.rss_ind_table, 0, + sizeof(edev->rss_params.rss_ind_table)); + } + if (netif_running(dev)) qede_reload(edev, NULL, NULL); -- cgit v0.10.2 From 15c6de2c65048d45484e396194ba2598618d4cb4 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:44 -0400 Subject: qed: Zero-out the buffer paased to dcbx_query() API qed_dcbx_query_params() implementation populate the values to input buffer based on the dcbx mode and, the current negotiated state/params, the caller of this API need to memset the buffer to zero. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 130da1c..a4789a9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1190,6 +1190,7 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, if (!dcbx_info) return -ENOMEM; + memset(dcbx_info, 0, sizeof(*dcbx_info)); rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB); if (rc) { kfree(dcbx_info); @@ -1225,6 +1226,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, if (!dcbx_info) return NULL; + memset(dcbx_info, 0, sizeof(*dcbx_info)); if (qed_dcbx_query_params(hwfn, dcbx_info, type)) { kfree(dcbx_info); return NULL; -- cgit v0.10.2 From fabd545c6d27ac1977fe567c43cd4c72fad04172 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 21 Oct 2016 04:43:45 -0400 Subject: qede: Fix incorrrect usage of APIs for un-mapping DMA memory Driver uses incorrect APIs to unmap DMA memory which were mapped using dma_map_single(). This patch fixes it to use appropriate APIs for un-mapping DMA memory. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index d47bdaa..12251a1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1199,8 +1199,8 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev, } first_bd = (struct eth_tx_1st_bd *)qed_chain_consume(&txq->tx_pbl); - dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), - BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE); + dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), + BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE); txq->sw_tx_cons++; txq->sw_tx_ring[idx].skb = NULL; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index da8ef69..444b271 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -313,8 +313,8 @@ static int qede_free_tx_pkt(struct qede_dev *edev, split_bd_len = BD_UNMAP_LEN(split); bds_consumed++; } - dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), - BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); + dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), + BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); /* Unmap the data of the skb frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, bds_consumed++) { @@ -359,8 +359,8 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, nbd--; } - dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), - BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); + dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), + BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); /* Unmap the data of the skb frags */ for (i = 0; i < nbd; i++) { -- cgit v0.10.2 From a6e2846cacf97d4c70c5e923325b015cfa1e9053 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 02:09:17 -0400 Subject: bnx2x: Use the correct divisor value for PHC clock readings. Time Sync (PTP) implementation uses the divisor/shift value for converting the clock ticks to nanoseconds. Driver currently defines shift value as 1, this results in the nanoseconds value to be calculated as half the actual value. Hence the user application fails to synchronize the device clock value with the PTP master device clock. Need to use the 'shift' value of 0. Signed-off-by: Sony.Chacko Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 20fe6a8..0cee4c0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -15241,7 +15241,7 @@ static void bnx2x_init_cyclecounter(struct bnx2x *bp) memset(&bp->cyclecounter, 0, sizeof(bp->cyclecounter)); bp->cyclecounter.read = bnx2x_cyclecounter_read; bp->cyclecounter.mask = CYCLECOUNTER_MASK(64); - bp->cyclecounter.shift = 1; + bp->cyclecounter.shift = 0; bp->cyclecounter.mult = 1; } -- cgit v0.10.2 From b678aa578c9e400429e027269e8de2783e5e73ce Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 21 Oct 2016 18:28:25 +0900 Subject: ipv6: do not increment mac header when it's unset Otherwise we'll overflow the integer. This occurs when layer 3 tunneled packets are handed off to the IPv6 layer. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2160d5d..3815e85 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -456,7 +456,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; memmove(head->head + sizeof(struct frag_hdr), head->head, (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); + if (skb_mac_header_was_set(head)) + head->mac_header += sizeof(struct frag_hdr); head->network_header += sizeof(struct frag_hdr); skb_reset_transport_header(head); -- cgit v0.10.2 From 235bde1ed3f0fff0f68f367ec8807b89ea151258 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 21 Oct 2016 09:34:29 -0200 Subject: net: fec: Call swap_buffer() prior to IP header alignment Commit 3ac72b7b63d5 ("net: fec: align IP header in hardware") breaks networking on mx28. There is an erratum on mx28 (ENGR121613 - ENET big endian mode not compatible with ARM little endian) that requires an additional byte-swap operation to workaround this problem. So call swap_buffer() prior to performing the IP header alignment to restore network functionality on mx28. Fixes: 3ac72b7b63d5 ("net: fec: align IP header in hardware") Reported-and-tested-by: Henri Roosen Signed-off-by: Fabio Estevam Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 48a033e..5aa9d4d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1430,14 +1430,14 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) skb_put(skb, pkt_len - 4); data = skb->data; + if (!is_copybreak && need_swap) + swap_buffer(data, pkt_len); + #if !defined(CONFIG_M5272) if (fep->quirks & FEC_QUIRK_HAS_RACC) data = skb_pull_inline(skb, 2); #endif - if (!is_copybreak && need_swap) - swap_buffer(data, pkt_len); - /* Extract the enhanced buffer descriptor */ ebdp = NULL; if (fep->bufdesc_ex) -- cgit v0.10.2 From a4b8e71b05c27bae6bad3bdecddbc6b68a3ad8cf Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 21 Oct 2016 14:13:24 +0200 Subject: net: sctp, forbid negative length Most of getsockopt handlers in net/sctp/socket.c check len against sizeof some structure like: if (len < sizeof(int)) return -EINVAL; On the first look, the check seems to be correct. But since len is int and sizeof returns size_t, int gets promoted to unsigned size_t too. So the test returns false for negative lengths. Yes, (-1 < sizeof(long)) is false. Fix this in sctp by explicitly checking len < 0 before any getsockopt handler is called. Note that sctp_getsockopt_events already handled the negative case. Since we added the < 0 check elsewhere, this one can be removed. If not checked, this is the result: UBSAN: Undefined behaviour in ../mm/page_alloc.c:2722:19 shift exponent 52 is too large for 32-bit type 'int' CPU: 1 PID: 24535 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 0000000000000000 ffff88006d99f2a8 ffffffffb2f7bdea 0000000041b58ab3 ffffffffb4363c14 ffffffffb2f7bcde ffff88006d99f2d0 ffff88006d99f270 0000000000000000 0000000000000000 0000000000000034 ffffffffb5096422 Call Trace: [] ? __ubsan_handle_shift_out_of_bounds+0x29c/0x300 ... [] ? kmalloc_order+0x24/0x90 [] ? kmalloc_order_trace+0x24/0x220 [] ? __kmalloc+0x330/0x540 [] ? sctp_getsockopt_local_addrs+0x174/0xca0 [sctp] [] ? sctp_getsockopt+0x10d/0x1b0 [sctp] [] ? sock_common_getsockopt+0xb9/0x150 [] ? SyS_getsockopt+0x1a5/0x270 Signed-off-by: Jiri Slaby Cc: Vlad Yasevich Cc: Neil Horman Cc: "David S. Miller" Cc: linux-sctp@vger.kernel.org Cc: netdev@vger.kernel.org Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/socket.c b/net/sctp/socket.c index fb02c70..9fbb6fe 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4687,7 +4687,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len <= 0) + if (len == 0) return -EINVAL; if (len > sizeof(struct sctp_event_subscribe)) len = sizeof(struct sctp_event_subscribe); @@ -6430,6 +6430,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; + if (len < 0) + return -EINVAL; + lock_sock(sk); switch (optname) { -- cgit v0.10.2 From e1957dba5b54b9c9b1d16f9d5f3f8d41d82bee41 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Oct 2016 07:56:59 +0200 Subject: cfg80211: process events caused by suspend before suspending When suspending without WoWLAN, cfg80211 will ask drivers to disconnect. Even when the driver does this synchronously, and immediately returns with a notification, cfg80211 schedules the handling thereof to a workqueue, and may then call back into the driver when the driver was already suspended/ing. Fix this by processing all events caused by cfg80211_leave_all() directly after that function returns. The driver still needs to do the right thing here and wait for the firmware response, but that is - at least - true for mwifiex where this occurred. Reported-by: Amitkumar Karwar Tested-by: Amitkumar Karwar Signed-off-by: Johannes Berg diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 0082f4b..14b3f00 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -104,13 +104,16 @@ static int wiphy_suspend(struct device *dev) rtnl_lock(); if (rdev->wiphy.registered) { - if (!rdev->wiphy.wowlan_config) + if (!rdev->wiphy.wowlan_config) { cfg80211_leave_all(rdev); + cfg80211_process_rdev_events(rdev); + } if (rdev->ops->suspend) ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); + cfg80211_process_rdev_events(rdev); ret = rdev_suspend(rdev, NULL); } } -- cgit v0.10.2 From b4f7f4ad425a84fd5d40da21aff8681997b25ff9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 21 Oct 2016 15:57:23 +0300 Subject: mac80211: fix some sphinx warnings Signed-off-by: Jani Nikula Signed-off-by: Johannes Berg diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a810dfc..e2dba93 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -811,14 +811,18 @@ enum mac80211_rate_control_flags { * in the control information, and it will be filled by the rate * control algorithm according to what should be sent. For example, * if this array contains, in the format { , } the - * information + * information:: + * * { 3, 2 }, { 2, 2 }, { 1, 4 }, { -1, 0 }, { -1, 0 } + * * then this means that the frame should be transmitted * up to twice at rate 3, up to twice at rate 2, and up to four * times at rate 1 if it doesn't get acknowledged. Say it gets * acknowledged by the peer after the fifth attempt, the status - * information should then contain + * information should then contain:: + * * { 3, 2 }, { 2, 2 }, { 1, 1 }, { -1, 0 } ... + * * since it was transmitted twice at rate 3, twice at rate 2 * and once at rate 1 after which we received an acknowledgement. */ @@ -1168,8 +1172,8 @@ enum mac80211_rx_vht_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) * @vht_nss: number of streams (VHT only) - * @flag: %RX_FLAG_* - * @vht_flag: %RX_VHT_FLAG_* + * @flag: %RX_FLAG_\* + * @vht_flag: %RX_VHT_FLAG_\* * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1432,7 +1436,7 @@ enum ieee80211_vif_flags { * @probe_req_reg: probe requests should be reported to mac80211 for this * interface. * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *). + * sizeof(void \*). * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) */ struct ieee80211_vif { @@ -1743,7 +1747,7 @@ struct ieee80211_sta_rates { * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *), size is determined in hw information. + * sizeof(void \*), size is determined in hw information. * @uapsd_queues: bitmap of queues configured for uapsd. Only valid * if wme is supported. * @max_sp: max Service Period. Only valid if wme is supported. @@ -2146,12 +2150,12 @@ enum ieee80211_hw_flags { * * @radiotap_mcs_details: lists which MCS information can the HW * reports, by default it is set to _MCS, _GI and _BW but doesn't - * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_* values, only + * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_\* values, only * adding _BW is supported today. * * @radiotap_vht_details: lists which VHT MCS information the HW reports, * the default is _GI | _BANDWIDTH. - * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. + * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values. * * @radiotap_timestamp: Information for the radiotap timestamp field; if the * 'units_pos' member is set to a non-negative value it must be set to @@ -2486,6 +2490,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * in the software stack cares about, we will, in the future, have mac80211 * tell the driver which information elements are interesting in the sense * that we want to see changes in them. This will include + * * - a list of information element IDs * - a list of OUIs for the vendor information element * -- cgit v0.10.2 From 92d230dd8cafac417e130e404d4b64eafe2271de Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 22 Oct 2016 14:31:06 +0000 Subject: rocker: fix error return code in rocker_world_check_init() Fix to return error code -EINVAL from the error handling case instead of 0, as done elsewhere in this function. Fixes: e420114eef4a ("rocker: introduce worlds infrastructure") Signed-off-by: Wei Yongjun Acked-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 5424fb3..24b7464 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1471,7 +1471,7 @@ static int rocker_world_check_init(struct rocker_port *rocker_port) if (rocker->wops) { if (rocker->wops->mode != mode) { dev_err(&rocker->pdev->dev, "hardware has ports in different worlds, which is not supported\n"); - return err; + return -EINVAL; } return 0; } -- cgit v0.10.2 From 293de7dee4f9602676846dbeb84b1580123306b4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 23 Oct 2016 09:28:29 -0700 Subject: doc: update docbook annotations for socket and skb The skbuff and sock structure both had missing parameter annotation values. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 601258f..32810f2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -936,6 +936,7 @@ struct sk_buff_fclones { /** * skb_fclone_busy - check if fclone is busy + * @sk: socket * @skb: buffer * * Returns true if skb is a fast clone, and its clone is not freed. diff --git a/include/net/sock.h b/include/net/sock.h index ebf75db..73c6b00 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -252,6 +252,7 @@ struct sock_common { * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes + * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) @@ -302,7 +303,8 @@ struct sock_common { * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 * @sk_reuseport_cb: reuseport group container - */ + * @sk_rcu: used during RCU grace period + */ struct sock { /* * Now struct inet_timewait_sock also uses sock_common, so please just -- cgit v0.10.2 From ecc515d7238f2cffac839069d56dc271141defa0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 24 Oct 2016 01:01:09 +0800 Subject: sctp: fix the panic caused by route update Commit 7303a1475008 ("sctp: identify chunks that need to be fragmented at IP level") made the chunk be fragmented at IP level in the next round if it's size exceed PMTU. But there still is another case, PMTU can be updated if transport's dst expires and transport's pmtu_pending is set in sctp_packet_transmit. If the new PMTU is less than the chunk, the same issue with that commit can be triggered. So we should drop this packet and let it retransmit in another round where it would be fragmented at IP level. This patch is to fix it by checking the chunk size after PMTU may be updated and dropping this packet if it's size exceed PMTU. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/output.c b/net/sctp/output.c index 2a5c189..6cb0df8 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -418,6 +418,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) __u8 has_data = 0; int gso = 0; int pktcount = 0; + int auth_len = 0; struct dst_entry *dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ @@ -510,7 +511,12 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) list_for_each_entry(chunk, &packet->chunk_list, list) { int padded = SCTP_PAD4(chunk->skb->len); - if (pkt_size + padded > tp->pathmtu) + if (chunk == packet->auth) + auth_len = padded; + else if (auth_len + padded + packet->overhead > + tp->pathmtu) + goto nomem; + else if (pkt_size + padded > tp->pathmtu) break; pkt_size += padded; } -- cgit v0.10.2 From 10df8e6152c6c400a563a673e9956320bfce1871 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Oct 2016 18:03:06 -0700 Subject: udp: fix IP_CHECKSUM handling First bug was added in commit ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") : Tom missed that ipv4 udp messages could be received on AF_INET6 socket. ip_cmsg_recv(msg, skb) should have been replaced by ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); Then commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") forgot to adjust the offsets now UDP headers are pulled before skb are put in receive queue. Fixes: ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Cc: Sam Kumar Cc: Willem de Bruijn Tested-by: Willem de Bruijn Signed-off-by: David S. Miller diff --git a/include/net/ip.h b/include/net/ip.h index c9d0798..5413883 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -578,7 +578,7 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -600,7 +600,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0); + ip_cmsg_recv_offset(msg, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index af49197..b8a2d63 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -98,7 +98,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) } static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { __wsum csum = skb->csum; @@ -106,8 +106,9 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb_transport_header(skb), - offset, 0)); + csum = csum_sub(csum, + csum_partial(skb_transport_header(skb) + tlen, + offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } @@ -153,7 +154,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) } void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { struct inet_sock *inet = inet_sk(skb->sk); unsigned int flags = inet->cmsg_flags; @@ -216,7 +217,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } if (flags & IP_CMSG_CHECKSUM) - ip_cmsg_recv_checksum(msg, skb, offset); + ip_cmsg_recv_checksum(msg, skb, tlen, offset); } EXPORT_SYMBOL(ip_cmsg_recv_offset); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 311613e..d123d68 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1322,7 +1322,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9aa7c1c..b2ef061 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -427,7 +427,8 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, + sizeof(struct udphdr), off); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); -- cgit v0.10.2 From e52fed7177f74382f742c27de2cc5314790aebb6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 23 Oct 2016 21:32:47 -0700 Subject: netvsc: fix incorrect receive checksum offloading The Hyper-V netvsc driver was looking at the incorrect status bits in the checksum info. It was setting the receive checksum unnecessary flag based on the IP header checksum being correct. The checksum flag is skb is about TCP and UDP checksum status. Because of this bug, any packet received with bad TCP checksum would be passed up the stack and to the application causing data corruption. The problem is reproducible via netcat and netem. This had a side effect of not doing receive checksum offload on IPv6. The driver was also also always doing checksum offload independent of the checksum setting done via ethtool. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5d6e75a..c71d966 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -607,15 +607,18 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, packet->total_data_buflen); skb->protocol = eth_type_trans(skb, net); - if (csum_info) { - /* We only look at the IP checksum here. - * Should we be dropping the packet if checksum - * failed? How do we deal with other checksums - TCP/UDP? - */ - if (csum_info->receive.ip_checksum_succeeded) + + /* skb is already created with CHECKSUM_NONE */ + skb_checksum_none_assert(skb); + + /* + * In Linux, the IP checksum is always checked. + * Do L4 checksum offload if enabled and present. + */ + if (csum_info && (net->features & NETIF_F_RXCSUM)) { + if (csum_info->receive.tcp_checksum_succeeded || + csum_info->receive.udp_checksum_succeeded) skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; } if (vlan_tci & VLAN_TAG_PRESENT) -- cgit v0.10.2 From 67f0160fe34ec5391a428603b9832c9f99d8f3a1 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Mon, 24 Oct 2016 08:48:09 +0300 Subject: MAINTAINERS: Update qlogic networking drivers Following Cavium's acquisition of qlogic we need to update all the qlogic drivers maintainer's entries to point to our new e-mail addresses, as well as update some of the driver's maintainers as those are no longer working for Cavium. I would like to thank Sony Chacko and Rajesh Borundia for their support and development of our various networking drivers. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 1fc66f0a..af299a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2543,15 +2543,18 @@ S: Supported F: drivers/net/ethernet/broadcom/genet/ BROADCOM BNX2 GIGABIT ETHERNET DRIVER -M: Sony Chacko -M: Dept-HSGLinuxNICDev@qlogic.com +M: Rasesh Mody +M: Harish Patil +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bnx2.* F: drivers/net/ethernet/broadcom/bnx2_* BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER -M: Ariel Elior +M: Yuval Mintz +M: Ariel Elior +M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bnx2x/ @@ -2758,7 +2761,9 @@ S: Supported F: drivers/scsi/bfa/ BROCADE BNA 10 GIGABIT ETHERNET DRIVER -M: Rasesh Mody +M: Rasesh Mody +M: Sudarsana Kalluru +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/brocade/bna/ @@ -8492,11 +8497,10 @@ F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ NETXEN (1/10) GbE SUPPORT -M: Manish Chopra -M: Sony Chacko -M: Rajesh Borundia +M: Manish Chopra +M: Rahul Verma +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org -W: http://www.qlogic.com S: Supported F: drivers/net/ethernet/qlogic/netxen/ @@ -9873,33 +9877,32 @@ F: Documentation/scsi/LICENSE.qla4xxx F: drivers/scsi/qla4xxx/ QLOGIC QLA3XXX NETWORK DRIVER -M: Jitendra Kalsaria -M: Ron Mercer -M: linux-driver@qlogic.com +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: Documentation/networking/LICENSE.qla3xxx F: drivers/net/ethernet/qlogic/qla3xxx.* QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Dept-GELinuxNICDev@qlogic.com +M: Harish Patil +M: Manish Chopra +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER -M: Harish Patil -M: Sudarsana Kalluru -M: Dept-GELinuxNICDev@qlogic.com -M: linux-driver@qlogic.com +M: Harish Patil +M: Manish Chopra +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qlge/ QLOGIC QL4xxx ETHERNET DRIVER -M: Yuval Mintz -M: Ariel Elior -M: everest-linux-l2@qlogic.com +M: Yuval Mintz +M: Ariel Elior +M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qed/ -- cgit v0.10.2 From 4fc6d239ee5640909932c47f3c7b93dd8e415fea Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 24 Oct 2016 12:40:53 +0200 Subject: Revert "at803x: fix suspend/resume for SGMII link" This reverts commit 98267311fe3b334ae7c107fa0e2413adcf3ba735. Suspending the SGMII alongside the copper side made the at803x inaccessable while powered down, e.g. it can't be re-probed after suspend. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index f279a89..cf74d10 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -52,9 +52,6 @@ #define AT803X_DEBUG_REG_5 0x05 #define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8) -#define AT803X_REG_CHIP_CONFIG 0x1f -#define AT803X_BT_BX_REG_SEL 0x8000 - #define ATH8030_PHY_ID 0x004dd076 #define ATH8031_PHY_ID 0x004dd074 #define ATH8035_PHY_ID 0x004dd072 @@ -209,7 +206,6 @@ static int at803x_suspend(struct phy_device *phydev) { int value; int wol_enabled; - int ccr; mutex_lock(&phydev->lock); @@ -225,16 +221,6 @@ static int at803x_suspend(struct phy_device *phydev) phy_write(phydev, MII_BMCR, value); - if (phydev->interface != PHY_INTERFACE_MODE_SGMII) - goto done; - - /* also power-down SGMII interface */ - ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL); - phy_write(phydev, MII_BMCR, phy_read(phydev, MII_BMCR) | BMCR_PDOWN); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL); - -done: mutex_unlock(&phydev->lock); return 0; @@ -243,7 +229,6 @@ done: static int at803x_resume(struct phy_device *phydev) { int value; - int ccr; mutex_lock(&phydev->lock); @@ -251,17 +236,6 @@ static int at803x_resume(struct phy_device *phydev) value &= ~(BMCR_PDOWN | BMCR_ISOLATE); phy_write(phydev, MII_BMCR, value); - if (phydev->interface != PHY_INTERFACE_MODE_SGMII) - goto done; - - /* also power-up SGMII interface */ - ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL); - value = phy_read(phydev, MII_BMCR) & ~(BMCR_PDOWN | BMCR_ISOLATE); - phy_write(phydev, MII_BMCR, value); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL); - -done: mutex_unlock(&phydev->lock); return 0; -- cgit v0.10.2 From f62265b53ef34a372b657c99e23d32e95b464316 Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 24 Oct 2016 12:40:54 +0200 Subject: at803x: double check SGMII side autoneg In SGMII mode, we observed an autonegotiation issue after power-down-up cycles where the copper side reports successful link establishment but the SGMII side's link is down. This happened in a setup where the at8031 is connected over SGMII to a eTSEC (fsl gianfar), but so far could not be reproduced with other Ethernet device / driver combinations. This commit adds a wrapper function for at8031 that in case of operating in SGMII mode double checks SGMII link state when generic aneg_done() succeeds. It prints a warning on failure but intentionally does not try to recover from this state. As a result, if you ever see a warning '803x_aneg_done: SGMII link is not ok' you will end up having an Ethernet link up but won't get any data through. This should not happen, if it does, please contact the module maintainer. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index cf74d10..a52b560 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -42,10 +42,18 @@ #define AT803X_MMD_ACCESS_CONTROL 0x0D #define AT803X_MMD_ACCESS_CONTROL_DATA 0x0E #define AT803X_FUNC_DATA 0x4003 +#define AT803X_REG_CHIP_CONFIG 0x1f +#define AT803X_BT_BX_REG_SEL 0x8000 #define AT803X_DEBUG_ADDR 0x1D #define AT803X_DEBUG_DATA 0x1E +#define AT803X_MODE_CFG_MASK 0x0F +#define AT803X_MODE_CFG_SGMII 0x01 + +#define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/ +#define AT803X_PSSR_MR_AN_COMPLETE 0x0200 + #define AT803X_DEBUG_REG_0 0x00 #define AT803X_DEBUG_RX_CLK_DLY_EN BIT(15) @@ -355,6 +363,36 @@ static void at803x_link_change_notify(struct phy_device *phydev) } } +static int at803x_aneg_done(struct phy_device *phydev) +{ + int ccr; + + int aneg_done = genphy_aneg_done(phydev); + if (aneg_done != BMSR_ANEGCOMPLETE) + return aneg_done; + + /* + * in SGMII mode, if copper side autoneg is successful, + * also check SGMII side autoneg result + */ + ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); + if ((ccr & AT803X_MODE_CFG_MASK) != AT803X_MODE_CFG_SGMII) + return aneg_done; + + /* switch to SGMII/fiber page */ + phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL); + + /* check if the SGMII link is OK. */ + if (!(phy_read(phydev, AT803X_PSSR) & AT803X_PSSR_MR_AN_COMPLETE)) { + pr_warn("803x_aneg_done: SGMII link is not ok\n"); + aneg_done = 0; + } + /* switch back to copper page */ + phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL); + + return aneg_done; +} + static struct phy_driver at803x_driver[] = { { /* ATHEROS 8035 */ @@ -406,6 +444,7 @@ static struct phy_driver at803x_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, + .aneg_done = at803x_aneg_done, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, } }; -- cgit v0.10.2 From e0f841f5cbf2a195c63f3441f3d8ef1cd2bdeeed Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Mon, 24 Oct 2016 15:44:26 +0200 Subject: macsec: Fix header length if SCI is added if explicitly disabled Even if sending SCIs is explicitly disabled, the code that creates the Security Tag might still decide to add it (e.g. if multiple RX SCs are defined on the MACsec interface). But because the header length so far only depended on the configuration option the SCI overwrote the original frame's contents (EtherType and e.g. the beginning of the IP header) and if encrypted did not visibly end up in the packet, while the SC flag in the TCI field of the Security Tag was still set, resulting in invalid MACsec frames. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Tobias Brunner Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3ea47f2..d2e61e0 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -397,6 +397,14 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 +static bool send_sci(const struct macsec_secy *secy) +{ + const struct macsec_tx_sc *tx_sc = &secy->tx_sc; + + return tx_sc->send_sci || + (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); +} + static sci_t make_sci(u8 *addr, __be16 port) { sci_t sci; @@ -437,15 +445,15 @@ static unsigned int macsec_extra_len(bool sci_present) /* Fill SecTAG according to IEEE 802.1AE-2006 10.5.3 */ static void macsec_fill_sectag(struct macsec_eth_header *h, - const struct macsec_secy *secy, u32 pn) + const struct macsec_secy *secy, u32 pn, + bool sci_present) { const struct macsec_tx_sc *tx_sc = &secy->tx_sc; - memset(&h->tci_an, 0, macsec_sectag_len(tx_sc->send_sci)); + memset(&h->tci_an, 0, macsec_sectag_len(sci_present)); h->eth.h_proto = htons(ETH_P_MACSEC); - if (tx_sc->send_sci || - (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb)) { + if (sci_present) { h->tci_an |= MACSEC_TCI_SC; memcpy(&h->secure_channel_id, &secy->sci, sizeof(h->secure_channel_id)); @@ -650,6 +658,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; struct macsec_dev *macsec = macsec_priv(dev); + bool sci_present; u32 pn; secy = &macsec->secy; @@ -687,7 +696,8 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, unprotected_len = skb->len; eth = eth_hdr(skb); - hh = (struct macsec_eth_header *)skb_push(skb, macsec_extra_len(tx_sc->send_sci)); + sci_present = send_sci(secy); + hh = (struct macsec_eth_header *)skb_push(skb, macsec_extra_len(sci_present)); memmove(hh, eth, 2 * ETH_ALEN); pn = tx_sa_update_pn(tx_sa, secy); @@ -696,7 +706,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, kfree_skb(skb); return ERR_PTR(-ENOLINK); } - macsec_fill_sectag(hh, secy, pn); + macsec_fill_sectag(hh, secy, pn, sci_present); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); skb_put(skb, secy->icv_len); @@ -726,10 +736,10 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, skb_to_sgvec(skb, sg, 0, skb->len); if (tx_sc->encrypt) { - int len = skb->len - macsec_hdr_len(tx_sc->send_sci) - + int len = skb->len - macsec_hdr_len(sci_present) - secy->icv_len; aead_request_set_crypt(req, sg, sg, len, iv); - aead_request_set_ad(req, macsec_hdr_len(tx_sc->send_sci)); + aead_request_set_ad(req, macsec_hdr_len(sci_present)); } else { aead_request_set_crypt(req, sg, sg, 0, iv); aead_request_set_ad(req, skb->len - secy->icv_len); -- cgit v0.10.2 From e30520c2b075fae3977d482a31b54b0256160a57 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:54:18 +0200 Subject: kalmia: avoid potential uninitialized variable use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kalmia_send_init_packet() returns zero or a negative return code, but gcc has no way of knowing that there cannot be a positive return code, so it determines that copying the ethernet address at the end of kalmia_bind() will access uninitialized data: drivers/net/usb/kalmia.c: In function ‘kalmia_bind’: arch/x86/include/asm/string_32.h:78:22: error: ‘*((void *)ðernet_addr+4)’ may be used uninitialized in this function [-Werror=maybe-uninitialized] *((short *)to + 2) = *((short *)from + 2); ^ drivers/net/usb/kalmia.c:138:5: note: ‘*((void *)ðernet_addr+4)’ was declared here This warning is harmless, but for consistency, we should make the check for the return code match what the driver does everywhere else and just progate it, which then gets rid of the warning. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index 5662bab..3e37724 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -151,7 +151,7 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf) status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr); - if (status < 0) { + if (status) { usb_set_intfdata(intf, NULL); usb_driver_release_interface(driver_of(intf), intf); return status; -- cgit v0.10.2 From 830218c1add1da16519b71909e5cf21522b7d062 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 24 Oct 2016 10:52:35 -0700 Subject: net: ipv6: Fix processing of RAs in presence of VRF rt6_add_route_info and rt6_add_dflt_router were updated to pull the FIB table from the device index, but the corresponding rt6_get_route_info and rt6_get_dflt_router functions were not leading to the failure to process RA's: ICMPv6: RA: ndisc_router_discovery failed to add default route Fix the 'get' functions by using the table id associated with the device when applicable. Also, now that default routes can be added to tables other than the default table, rt6_purge_dflt_routers needs to be updated as well to look at all tables. To handle that efficiently, add a flag to the table denoting if it is has a default route via RA. Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack") Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index fb961a5..a74e2aa 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -230,6 +230,8 @@ struct fib6_table { rwlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; + unsigned int flags; +#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bdbc38e..3ac19eb8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -102,11 +102,13 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref); static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr, + struct net_device *dev); #endif struct uncached_list { @@ -803,7 +805,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_dflt_router(gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, - gwaddr, dev->ifindex); + gwaddr, dev); if (rt && !lifetime) { ip6_del_rt(rt); @@ -811,8 +813,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -2325,13 +2327,16 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr, + struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; + int ifindex = dev->ifindex; struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(net, tb_id); if (!table) return NULL; @@ -2357,12 +2362,13 @@ out: static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref) { struct fib6_config cfg = { .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), @@ -2371,7 +2377,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO; + cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2381,16 +2387,17 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2424,20 +2431,20 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, cfg.fc_gateway = *gwaddr; - ip6_route_add(&cfg); + if (!ip6_route_add(&cfg)) { + struct fib6_table *table; + + table = fib6_get_table(dev_net(dev), cfg.fc_table); + if (table) + table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; + } return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) +static void __rt6_purge_dflt_routers(struct fib6_table *table) { struct rt6_info *rt; - struct fib6_table *table; - - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (!table) - return; restart: read_lock_bh(&table->tb6_lock); @@ -2451,6 +2458,27 @@ restart: } } read_unlock_bh(&table->tb6_lock); + + table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; +} + +void rt6_purge_dflt_routers(struct net *net) +{ + struct fib6_table *table; + struct hlist_head *head; + unsigned int h; + + rcu_read_lock(); + + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, head, tb6_hlist) { + if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) + __rt6_purge_dflt_routers(table); + } + } + + rcu_read_unlock(); } static void rtmsg_to_fib6_config(struct net *net, -- cgit v0.10.2 From d5d32e4b76687f4df9ad3ba8d3702b7347f51fa6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 24 Oct 2016 12:27:23 -0700 Subject: net: ipv6: Do not consider link state for nexthop validation Similar to IPv4, do not consider link state when validating next hops. Currently, if the link is down default routes can fail to insert: $ ip -6 ro add vrf blue default via 2100:2::64 dev eth2 RTNETLINK answers: No route to host With this patch the command succeeds. Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index e0cd318..f83e78d 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,7 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3ac19eb8..947ed1d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -658,7 +658,8 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, struct net_device *dev = rt->dst.dev; if (dev && !netif_carrier_ok(dev) && - idev->cnf.ignore_routes_with_linkdown) + idev->cnf.ignore_routes_with_linkdown && + !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; if (rt6_check_expired(rt)) @@ -1052,6 +1053,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; @@ -1791,7 +1793,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = RT6_LOOKUP_F_IFACE; + int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; table = fib6_get_table(net, cfg->fc_table); if (!table) -- cgit v0.10.2 From bc72f3dd89e087e33afe8c490fbe132e3dcd9afe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 23:40:30 +0200 Subject: flow_dissector: fix vlan tag handling gcc warns about an uninitialized pointer dereference in the vlan priority handling: net/core/flow_dissector.c: In function '__skb_flow_dissect': net/core/flow_dissector.c:281:61: error: 'vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] As pointed out by Jiri Pirko, the variable is never actually used without being initialized first as the only way it end up uninitialized is with skb_vlan_tag_present(skb)==true, and that means it does not get accessed. However, the warning hints at some related issues that I'm addressing here: - the second check for the vlan tag is different from the first one that tests the skb for being NULL first, causing both the warning and a possible NULL pointer dereference that was not entirely fixed. - The same patch that introduced the NULL pointer check dropped an earlier optimization that skipped the repeated check of the protocol type - The local '_vlan' variable is referenced through the 'vlan' pointer but the variable has gone out of scope by the time that it is accessed, causing undefined behavior Caching the result of the 'skb && skb_vlan_tag_present(skb)' check in a local variable allows the compiler to further optimize the later check. With those changes, the warning also disappears. Fixes: 3805a938a6c2 ("flow_dissector: Check skb for VLAN only if skb specified.") Fixes: d5709f7ab776 ("flow_dissector: For stripped vlan, get vlan info from skb->vlan_tci") Signed-off-by: Arnd Bergmann Acked-by: Jiri Pirko Acked-by: Eric Garver Signed-off-by: David S. Miller diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 44e6ba9..ab193e5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -246,13 +246,13 @@ ipv6: case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; + struct vlan_hdr _vlan; + bool vlan_tag_present = skb && skb_vlan_tag_present(skb); - if (skb && skb_vlan_tag_present(skb)) + if (vlan_tag_present) proto = skb->protocol; - if (eth_type_vlan(proto)) { - struct vlan_hdr _vlan; - + if (!vlan_tag_present || eth_type_vlan(skb->protocol)) { vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); if (!vlan) @@ -270,7 +270,7 @@ ipv6: FLOW_DISSECTOR_KEY_VLAN, target_container); - if (skb_vlan_tag_present(skb)) { + if (vlan_tag_present) { key_vlan->vlan_id = skb_vlan_tag_get_id(skb); key_vlan->vlan_priority = (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); -- cgit v0.10.2 From c121f72a66c5f92fbe2fc53baa274eef39875cec Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2016 23:46:18 +0100 Subject: net: bgmac: fix spelling mistake: "connecton" -> "connection" trivial fix to spelling mistake in dev_err message Signed-off-by: Colin Ian King Acked-by: Jon Mason Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 856379c..31ca204 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1449,7 +1449,7 @@ static int bgmac_phy_connect(struct bgmac *bgmac) phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phy_dev)) { - dev_err(bgmac->dev, "PHY connecton failed\n"); + dev_err(bgmac->dev, "PHY connection failed\n"); return PTR_ERR(phy_dev); } -- cgit v0.10.2 From a3b8cb1f84a0de95323902c76bab245675d6d218 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 22 Sep 2016 15:06:52 -0700 Subject: ixgbe: fix panic when using macvlan with l2-fwd-offload enabled Fix NULL pointer dereference in the case where a macvlan interface is brought up while the PF is still down: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] ixgbe_alloc_rx_buffers+0x42/0x1a0 [ixgbe] Call Trace: [] ixgbe_configure_rx_ring+0x2eb/0x3d0 [ixgbe] [] ixgbe_fwd_ring_up+0xd1/0x380 [ixgbe] [] ixgbe_fwd_add+0x149/0x230 [ixgbe] [] macvlan_open+0x260/0x2b0 [macvlan] Reported-by: Matthew Garrett Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a244d9a..bd93d82 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9135,10 +9135,14 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) goto fwd_add_err; fwd_adapter->pool = pool; fwd_adapter->real_adapter = adapter; - err = ixgbe_fwd_ring_up(vdev, fwd_adapter); - if (err) - goto fwd_add_err; - netif_tx_start_all_queues(vdev); + + if (netif_running(pdev)) { + err = ixgbe_fwd_ring_up(vdev, fwd_adapter); + if (err) + goto fwd_add_err; + netif_tx_start_all_queues(vdev); + } + return fwd_adapter; fwd_add_err: /* unwind counter and free adapter struct */ -- cgit v0.10.2 From ea6acb7ef78960e4b6f1cd8c4162a5e490e83dcd Mon Sep 17 00:00:00 2001 From: David Ertman Date: Tue, 20 Sep 2016 07:10:50 -0700 Subject: i40e: Fix configure TCs after initial DCB disable in commit a036244c068612a43fa8c0f33a0eb4daa4d8dba0 a fix was put into place to avoid a kernel panic when a non- supported traffic class configuration was put into place and then lldp was enabled/disabled on the link partner switch. This fix caused it to be necessary to unload/reload the driver to reenable DCB once a supported TC config was in place. The root cause of the original panic was that the function i40e_pf_get_default_tc was allowing for a default TC other than TC 0, and only TC 0 is supported as a default. This patch removes the get_default_tc function and replaces it with a #define since there is only one TC supported as a default. Change-Id: I448371974e946386d0a7718d73668b450b7c72ef Signed-off-by: Dave Ertman Tested-by: Ronald Bynoe Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2030d7c..6d61e44 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -92,6 +92,7 @@ #define I40E_AQ_LEN 256 #define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ #define I40E_MAX_USER_PRIORITY 8 +#define I40E_DEFAULT_TRAFFIC_CLASS BIT(0) #define I40E_DEFAULT_MSG_ENABLE 4 #define I40E_QUEUE_WAIT_RETRY_LIMIT 10 #define I40E_INT_NAME_STR_LEN (IFNAMSIZ + 16) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ac1faee..050d005 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4641,29 +4641,6 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) } /** - * i40e_pf_get_default_tc - Get bitmap for first enabled TC - * @pf: PF being queried - * - * Return a bitmap for first enabled traffic class for this PF. - **/ -static u8 i40e_pf_get_default_tc(struct i40e_pf *pf) -{ - u8 enabled_tc = pf->hw.func_caps.enabled_tcmap; - u8 i = 0; - - if (!enabled_tc) - return 0x1; /* TC0 */ - - /* Find the first enabled TC */ - for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - if (enabled_tc & BIT(i)) - break; - } - - return BIT(i); -} - -/** * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * @@ -4673,7 +4650,7 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) { /* If DCB is not enabled for this PF then just return default TC */ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) - return i40e_pf_get_default_tc(pf); + return I40E_DEFAULT_TRAFFIC_CLASS; /* SFP mode we want PF to be enabled for all TCs */ if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) @@ -4683,7 +4660,7 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) if (pf->hw.func_caps.iscsi) return i40e_get_iscsi_tc_map(pf); else - return i40e_pf_get_default_tc(pf); + return I40E_DEFAULT_TRAFFIC_CLASS; } /** @@ -5029,7 +5006,7 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf) if (v == pf->lan_vsi) tc_map = i40e_pf_get_tc_map(pf); else - tc_map = i40e_pf_get_default_tc(pf); + tc_map = I40E_DEFAULT_TRAFFIC_CLASS; #ifdef I40E_FCOE if (pf->vsi[v]->type == I40E_VSI_FCOE) tc_map = i40e_get_fcoe_tc_map(pf); @@ -5717,7 +5694,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, u8 type; /* Not DCB capable or capability disabled */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) return ret; /* Ignore if event is not for Nearest Bridge */ -- cgit v0.10.2 From 4c95aa5d8fc34be18fcab01b1c6251c8c2b61520 Mon Sep 17 00:00:00 2001 From: Guilherme G Piccoli Date: Thu, 22 Sep 2016 10:03:58 -0300 Subject: i40e: disable MSI-X interrupts if we cannot reserve enough vectors If we fail on allocating enough MSI-X interrupts, we should disable them since they were previously enabled in this point of code. Not disabling them can lead to WARN_ON() being triggered and subsequent failure in enabling MSI as a fallback; the below message was shown without this patch while we played with interrupt allocation in i40e driver: [ 21.461346] sysfs: cannot create duplicate filename '/devices/pci0007:00/0007:00:00.0/0007:01:00.3/msi_irqs' [ 21.461459] ------------[ cut here ]------------ [ 21.461514] WARNING: CPU: 64 PID: 1155 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x88/0xc0 Also, we noticed that without this patch, if we modprobe the module without enough MSI-X interrupts (triggering the above warning), unload the module and re-load it again, we got a crash on the system. Signed-off-by: Guilherme G Piccoli Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 050d005..6abc130 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7684,6 +7684,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->flags &= ~I40E_FLAG_MSIX_ENABLED; kfree(pf->msix_entries); pf->msix_entries = NULL; + pci_disable_msix(pf->pdev); return -ENODEV; } else if (v_actual == I40E_MIN_MSIX) { -- cgit v0.10.2 From 9ee7837449b3d6f0fcf9132c6b5e5aaa58cc67d4 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 24 Oct 2016 20:18:27 -0400 Subject: net sched filters: fix notification of filter delete with proper handle Daniel says: While trying out [1][2], I noticed that tc monitor doesn't show the correct handle on delete: $ tc monitor qdisc clsact ffff: dev eno1 parent ffff:fff1 filter dev eno1 ingress protocol all pref 49152 bpf handle 0x2a [...] deleted filter dev eno1 ingress protocol all pref 49152 bpf handle 0xf3be0c80 some context to explain the above: The user identity of any tc filter is represented by a 32-bit identifier encoded in tcm->tcm_handle. Example 0x2a in the bpf filter above. A user wishing to delete, get or even modify a specific filter uses this handle to reference it. Every classifier is free to provide its own semantics for the 32 bit handle. Example: classifiers like u32 use schemes like 800:1:801 to describe the semantics of their filters represented as hash table, bucket and node ids etc. Classifiers also have internal per-filter representation which is different from this externally visible identity. Most classifiers set this internal representation to be a pointer address (which allows fast retrieval of said filters in their implementations). This internal representation is referenced with the "fh" variable in the kernel control code. When a user successfuly deletes a specific filter, by specifying the correct tcm->tcm_handle, an event is generated to user space which indicates which specific filter was deleted. Before this patch, the "fh" value was sent to user space as the identity. As an example what is shown in the sample bpf filter delete event above is 0xf3be0c80. This is infact a 32-bit truncation of 0xffff8807f3be0c80 which happens to be a 64-bit memory address of the internal filter representation (address of the corresponding filter's struct cls_bpf_prog); After this patch the appropriate user identifiable handle as encoded in the originating request tcm->tcm_handle is generated in the event. One of the cardinal rules of netlink rules is to be able to take an event (such as a delete in this case) and reflect it back to the kernel and successfully delete the filter. This patch achieves that. Note, this issue has existed since the original TC action infrastructure code patch back in 2004 as found in: https://git.kernel.org/cgit/linux/kernel/git/history/history.git/commit/ [1] http://patchwork.ozlabs.org/patch/682828/ [2] http://patchwork.ozlabs.org/patch/682829/ Fixes: 4e54c4816bfe ("[NET]: Add tc extensions infrastructure.") Reported-by: Daniel Borkmann Acked-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ee29a3..2b2a797 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -345,7 +345,8 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, + tfilter_notify(net, skb, n, tp, + t->tcm_handle, RTM_DELTFILTER, false); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); -- cgit v0.10.2 From 599b076d15ee3ead7af20fc907079df00b2d59a0 Mon Sep 17 00:00:00 2001 From: Huaibin Wang Date: Mon, 26 Sep 2016 09:51:18 +0200 Subject: i40e: fix call of ndo_dflt_bridge_getlink() Order of arguments is wrong. The wrong code has been introduced by commit 7d4f8d871ab1, but is compiled only since commit 9df70b66418e. Note that this may break netlink dumps. Fixes: 9df70b66418e ("i40e: Remove incorrect #ifdef's") Fixes: 7d4f8d871ab1 ("switchdev; add VLAN support for port's bridge_getlink") CC: Carolyn Wyborny Signed-off-by: Huaibin Wang Signed-off-by: Nicolas Dichtel Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6abc130..31c97e3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9034,7 +9034,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, - nlflags, 0, 0, filter_mask, NULL); + 0, 0, nlflags, filter_mask, NULL); } /* Hardware supports L4 tunnel length of 128B (=2^7) which includes -- cgit v0.10.2 From 2083d36790e328a364896a2833fbedbf6e1d2317 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Oct 2016 11:25:56 +0200 Subject: mlxsw: spectrum_router: Save requested prefix bitlist when creating tree Currently, the prefix bitlist is not saved for LPM trees, causing the compare to always fail which causes the tree to be destroyed and created for every inserted and removed FIB entry. So fix this by saving the bitlist as it should have been done from the very beginning. Fixes: 53342023eed9 ("mlxsw: spectrum_router: Implement LPM trees management") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f3d50d3..4cff4c0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -320,6 +320,8 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, lpm_tree); if (err) goto err_left_struct_set; + memcpy(&lpm_tree->prefix_usage, prefix_usage, + sizeof(lpm_tree->prefix_usage)); return lpm_tree; err_left_struct_set: -- cgit v0.10.2 From 8b99becdc89ee7635137725e9fa76f304bb168f5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Oct 2016 11:25:57 +0200 Subject: mlxsw: spectrum_router: Compare only trees which are in use during tree get Only trees which are in use should be compared to requested prefix usage. Fixes: 53342023eed9 ("mlxsw: spectrum_router: Implement LPM trees management") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4cff4c0..4573da2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -345,7 +345,8 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { lpm_tree = &mlxsw_sp->router.lpm_trees[i]; - if (lpm_tree->proto == proto && + if (lpm_tree->ref_count != 0 && + lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, prefix_usage)) goto inc_ref_count; -- cgit v0.10.2 From bf911e985d6bbaa328c20c3e05f4eb03de11fdd6 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 25 Oct 2016 14:27:39 -0200 Subject: sctp: validate chunk len before actually using it Andrey Konovalov reported that KASAN detected that SCTP was using a slab beyond the boundaries. It was caused because when handling out of the blue packets in function sctp_sf_ootb() it was checking the chunk len only after already processing the first chunk, validating only for the 2nd and subsequent ones. The fix is to just move the check upwards so it's also validated for the 1st chunk. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 026e3bc..8ec20a6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3422,6 +3422,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + /* Report violation if chunk len overflows */ + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); + if (ch_end > skb_tail_pointer(skb)) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* Now that we know we at least have a chunk header, * do things that are type appropriate. */ @@ -3453,12 +3459,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); -- cgit v0.10.2 From b47bd6ea40636362a8b6605de51207cc387ba0b8 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 25 Oct 2016 18:36:24 +0300 Subject: {net, ib}/mlx5: Make cache line size determination at runtime. ARM 64B cache line systems have L1_CACHE_BYTES set to 128. cache_line_size() will return the correct size. Fixes: cf50b5efa2fe('net/mlx5_core/ib: New device capabilities handling.') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2217477..63036c7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1019,7 +1019,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); - resp.cache_line_size = L1_CACHE_BYTES; + resp.cache_line_size = cache_line_size(); resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 41f4c2a..7ce97da 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -52,7 +52,6 @@ enum { enum { MLX5_IB_SQ_STRIDE = 6, - MLX5_IB_CACHE_LINE_SIZE = 64, }; static const u32 mlx5_ib_opcode[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 6cb3830..2c6e3c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -41,6 +41,13 @@ #include "mlx5_core.h" +struct mlx5_db_pgdir { + struct list_head list; + unsigned long *bitmap; + __be32 *db_page; + dma_addr_t db_dma; +}; + /* Handling for queue buffers -- we allocate a bunch of memory and * register it in a memory region at HCA virtual address 0. */ @@ -102,17 +109,28 @@ EXPORT_SYMBOL_GPL(mlx5_buf_free); static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, int node) { + u32 db_per_page = PAGE_SIZE / cache_line_size(); struct mlx5_db_pgdir *pgdir; pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); if (!pgdir) return NULL; - bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE); + pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page), + sizeof(unsigned long), + GFP_KERNEL); + + if (!pgdir->bitmap) { + kfree(pgdir); + return NULL; + } + + bitmap_fill(pgdir->bitmap, db_per_page); pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, &pgdir->db_dma, node); if (!pgdir->db_page) { + kfree(pgdir->bitmap); kfree(pgdir); return NULL; } @@ -123,18 +141,19 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, struct mlx5_db *db) { + u32 db_per_page = PAGE_SIZE / cache_line_size(); int offset; int i; - i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE); - if (i >= MLX5_DB_PER_PAGE) + i = find_first_bit(pgdir->bitmap, db_per_page); + if (i >= db_per_page) return -ENOMEM; __clear_bit(i, pgdir->bitmap); db->u.pgdir = pgdir; db->index = i; - offset = db->index * L1_CACHE_BYTES; + offset = db->index * cache_line_size(); db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page); db->dma = pgdir->db_dma + offset; @@ -181,14 +200,16 @@ EXPORT_SYMBOL_GPL(mlx5_db_alloc); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) { + u32 db_per_page = PAGE_SIZE / cache_line_size(); mutex_lock(&dev->priv.pgdir_mutex); __set_bit(db->index, db->u.pgdir->bitmap); - if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) { + if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) { dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); + kfree(db->u.pgdir->bitmap); kfree(db->u.pgdir); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 85c4786..5dbda60 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -626,10 +626,6 @@ struct mlx5_db { }; enum { - MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES, -}; - -enum { MLX5_COMP_EQ_SIZE = 1024, }; @@ -638,13 +634,6 @@ enum { MLX5_PTYS_EN = 1 << 2, }; -struct mlx5_db_pgdir { - struct list_head list; - DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); - __be32 *db_page; - dma_addr_t db_dma; -}; - typedef void (*mlx5_cmd_cbk_t)(int status, void *context); struct mlx5_cmd_work_ent { -- cgit v0.10.2 From bba1574c2f11d674bf343f3cf307b33d19f73d9d Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 25 Oct 2016 18:36:25 +0300 Subject: net/mlx5: Always Query HCA caps after setting them Always query the HCA caps after setting them to update the capablities data structures. Not doing so results in incorrect capabilities being reported including max_dc, max_qp and several others. Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d9c3c70..8a63910 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -844,12 +844,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) struct pci_dev *pdev = dev->pdev; int err; - err = mlx5_query_hca_caps(dev); - if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto out; - } - err = mlx5_query_board_id(dev); if (err) { dev_err(&pdev->dev, "query board id failed\n"); @@ -1023,6 +1017,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_start_health_poll(dev); + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto err_stop_poll; + } + if (boot && mlx5_init_once(dev, priv)) { dev_err(&pdev->dev, "sw objs init failed\n"); goto err_stop_poll; -- cgit v0.10.2 From eccec8da3b4e6f403040c7187338a46d2ea27c20 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 25 Oct 2016 18:36:26 +0300 Subject: net/mlx5: Keep autogroups list ordered Finding a new autogroup range is done by going over a group list sorted by each group start index. The search is stopped after finding the first free range. Adding the newly created group to the list is wrongly added to the end of the list regardless of its start index as the parameter of where to insert it is ignored. This commit makes sure to use that unused parameter to insert it where requested. Fixes: f0d22d187473 ('net/mlx5_core: Introduce flow steering autogrouped flow table') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5da2cc8..17559c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -879,7 +879,7 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * tree_init_node(&fg->node, !is_auto_fg, del_flow_group); tree_add_node(&fg->node, &ft->node); /* Add node to group list */ - list_add(&fg->node.list, ft->node.children.prev); + list_add(&fg->node.list, prev_fg); return fg; } @@ -893,7 +893,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, return ERR_PTR(-EPERM); lock_ref_node(&ft->node); - fg = create_flow_group_common(ft, fg_in, &ft->node.children, false); + fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false); unlock_ref_node(&ft->node); return fg; @@ -1012,7 +1012,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, u32 *match_criteria) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct list_head *prev = &ft->node.children; + struct list_head *prev = ft->node.children.prev; unsigned int candidate_index = 0; struct mlx5_flow_group *fg; void *match_criteria_addr; -- cgit v0.10.2 From 32dba76a7a3104b2815f31b1755f4c0ba1f01a78 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 25 Oct 2016 18:36:27 +0300 Subject: net/mlx5: Fix autogroups groups num not decreasing Autogroups groups num is increased when creating a new flow group, but is never decreased. Now decreasing it when deleting a flow group. Fixes: f0d22d187473 ('net/mlx5_core: Introduce flow steering autogrouped flow table') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 17559c8..8969604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -436,6 +436,9 @@ static void del_flow_group(struct fs_node *node) fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); + if (ft->autogroup.active) + ft->autogroup.num_groups--; + if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); -- cgit v0.10.2 From e83d6955fe422f120b0e98e4f02496af89845eef Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 25 Oct 2016 18:36:28 +0300 Subject: net/mlx5: Correctly initialize last use of flow counters Currently, last use timestamp is initialized to zero. This is not the expected value by higher layers such as when we do TC action offloading. To fix that, set it to the current time, e.g when the counter/rule is offloaded. This is the same behaviour of non-offloaded TC actions. Fixes: 43a335e055bb ('mlx5_core: Flow counters infrastructure') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 3a9195b..3b026c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -218,6 +218,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) goto err_out; if (aging) { + counter->cache.lastuse = jiffies; counter->aging = true; spin_lock(&fc_stats->addlist_lock); -- cgit v0.10.2 From 2b029556667a7fc1aea731c5828e501656f87653 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 25 Oct 2016 18:36:29 +0300 Subject: net/mlx5e: Choose best nearest LRO timeout Instead of predicting the index of the wanted LRO timeout value from hardware capabilities, look for the nearest LRO timeout value. Fixes: 5c50368f3831 ('net/mlx5e: Light-weight netdev open/stop') Signed-off-by: Saeed Mahameed Signed-off-by: Mohamad Haj Yahia Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 460363b..7a43502 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -85,6 +85,9 @@ #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) +#define MLX5E_DEFAULT_LRO_TIMEOUT 32 +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 @@ -221,6 +224,7 @@ struct mlx5e_params { struct ieee_ets ets; #endif bool rx_am_enabled; + u32 lro_timeout; }; struct mlx5e_tstamp { @@ -888,5 +892,6 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7eaf380..5c8ab3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1971,9 +1971,7 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) MLX5_SET(tirc, tirc, lro_max_ip_payload_size, (priv->params.lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, - MLX5_CAP_ETH(priv->mdev, - lro_timer_supported_periods[2])); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); } void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) @@ -3401,6 +3399,18 @@ static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, } } +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile, @@ -3419,6 +3429,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->profile = profile; priv->ppriv = ppriv; + priv->params.lro_timeout = + mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ -- cgit v0.10.2 From 5e1e93c7047381b81236f82f60c15d49c510d1a7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:30 +0300 Subject: net/mlx5e: Unregister netdev before detaching it Detaching the netdev before unregistering it cause some netdev cleanup ndos to fail because they check presence of the netdev, so we need to unregister the netdev first. Fixes: 26e59d8077a3 ('net/mlx5e: Implement mlx5e interface attach/detach callbacks') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5c8ab3e..f4c687c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4048,7 +4048,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; - unregister_netdev(netdev); destroy_workqueue(priv->wq); if (profile->cleanup) profile->cleanup(priv); @@ -4065,6 +4064,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); + unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(mdev, priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3c97da1..7fe6559 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -457,6 +457,7 @@ void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5e_priv *priv = rep->priv_data; struct net_device *netdev = priv->netdev; + unregister_netdev(netdev); mlx5e_detach_netdev(esw->dev, netdev); mlx5e_destroy_netdev(esw->dev, priv); } -- cgit v0.10.2 From 247f139cdae73b4f47bd348d05dff1afd40b84b6 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:31 +0300 Subject: net/mlx5: Change the acl enable prototype to return status The Ingress/Egress ACL enable function may fail and it should return status to its caller to avoid NULL pointer dereference. Fixes: f942380c1239 ('net/mlx5: E-Switch, Vport ingress/egress ACLs rules for spoofchk') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index abbf2c3..be1f733 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -931,8 +931,8 @@ static void esw_vport_change_handler(struct work_struct *work) mutex_unlock(&esw->state_lock); } -static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *vlan_grp = NULL; @@ -949,9 +949,11 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, int table_size = 2; int err = 0; - if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support) || - !IS_ERR_OR_NULL(vport->egress.acl)) - return; + if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + if (!IS_ERR_OR_NULL(vport->egress.acl)) + return 0; esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n", vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); @@ -959,12 +961,12 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace\n"); - return; + return -EIO; } flow_group_in = mlx5_vzalloc(inlen); if (!flow_group_in) - return; + return -ENOMEM; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); if (IS_ERR(acl)) { @@ -1009,6 +1011,7 @@ out: mlx5_destroy_flow_group(vlan_grp); if (err && !IS_ERR_OR_NULL(acl)) mlx5_destroy_flow_table(acl); + return err; } static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, @@ -1041,8 +1044,8 @@ static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, vport->egress.acl = NULL; } -static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -1063,9 +1066,11 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, int table_size = 4; int err = 0; - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) || - !IS_ERR_OR_NULL(vport->ingress.acl)) - return; + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + if (!IS_ERR_OR_NULL(vport->ingress.acl)) + return 0; esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); @@ -1073,12 +1078,12 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n"); - return; + return -EIO; } flow_group_in = mlx5_vzalloc(inlen); if (!flow_group_in) - return; + return -ENOMEM; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); if (IS_ERR(acl)) { @@ -1167,6 +1172,7 @@ out: } kvfree(flow_group_in); + return err; } static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, @@ -1225,7 +1231,13 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, return 0; } - esw_vport_enable_ingress_acl(esw, vport); + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + mlx5_core_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } esw_debug(esw->dev, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", @@ -1299,7 +1311,13 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, return 0; } - esw_vport_enable_egress_acl(esw, vport); + err = esw_vport_enable_egress_acl(esw, vport); + if (err) { + mlx5_core_warn(esw->dev, + "failed to enable egress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", -- cgit v0.10.2 From 2241007b3d783cbdbaa78c30bdb1994278b6f9b9 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:32 +0300 Subject: net/mlx5: Clear health sick bit when starting health poll The health sick status should be cleared when we start the health poll. This is crucial for driver reload (unload + load) in order to behave right in case of health issue. Fixes: fd76ee4da55a ('net/mlx5_core: Fix internal error detection conditions') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 1a05fb9..2cb4094 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -281,6 +281,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; init_timer(&health->timer); + health->sick = 0; health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; -- cgit v0.10.2 From 05ac2c0b7438ea08c5d54b48797acf9b22cb2f6f Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:33 +0300 Subject: net/mlx5: Fix race between PCI error handlers and health work Currently there is a race between the health care work and the kernel pci error handlers because both of them detect the error, the first one to be called will do the error handling. There is a chance that health care will disable the pci after resuming pci slot. Also create a separate WQ because now we will have two types of health works, one for the error detection and one for the recovery. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2cb4094..2d00022 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -64,6 +64,10 @@ enum { MLX5_NIC_IFC_NO_DRAM_NIC = 2 }; +enum { + MLX5_DROP_NEW_HEALTH_WORK, +}; + static u8 get_nic_interface(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; @@ -272,7 +276,13 @@ static void poll_health(unsigned long data) if (in_fatal(dev) && !health->sick) { health->sick = true; print_health_info(dev); - schedule_work(&health->work); + spin_lock(&health->wq_lock); + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + queue_work(health->wq, &health->work); + else + dev_err(&dev->pdev->dev, + "new health works are not permitted at this stage\n"); + spin_unlock(&health->wq_lock); } } @@ -282,6 +292,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) init_timer(&health->timer); health->sick = 0; + clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -298,11 +309,21 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev) del_timer_sync(&health->timer); } +void mlx5_drain_health_wq(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + spin_lock(&health->wq_lock); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + spin_unlock(&health->wq_lock); + cancel_work_sync(&health->work); +} + void mlx5_health_cleanup(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - flush_work(&health->work); + destroy_workqueue(health->wq); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -317,8 +338,11 @@ int mlx5_health_init(struct mlx5_core_dev *dev) strcpy(name, "mlx5_health"); strcat(name, dev_name(&dev->pdev->dev)); + health->wq = create_singlethread_workqueue(name); kfree(name); - + if (!health->wq) + return -ENOMEM; + spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8a63910..9f90226 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1315,8 +1315,13 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - pci_save_state(pdev); - mlx5_pci_disable_device(dev); + /* In case of kernel call save the pci state and drain health wq */ + if (state) { + pci_save_state(pdev); + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); + } + return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5dbda60..7d9a5d0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -418,7 +418,10 @@ struct mlx5_core_health { u32 prev; int miss_counter; bool sick; + /* wq spinlock to synchronize draining */ + spinlock_t wq_lock; struct workqueue_struct *wq; + unsigned long flags; struct work_struct work; }; @@ -778,6 +781,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +void mlx5_drain_health_wq(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); -- cgit v0.10.2 From 04c0c1ab38e95105d950db5b84e727637e149ce7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:34 +0300 Subject: net/mlx5: PCI error recovery health care simulation In case that the kernel PCI error handlers are not called, we will trigger our own recovery flow. The health work will give priority to the kernel pci error handlers to recover the PCI by waiting for a small period, if the pci error handlers are not triggered the manual recovery flow will be executed. We don't save pci state in case of manual recovery because it will ruin the pci configuration space and we will lose dma sync. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2d00022..5bcf934 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -61,14 +61,15 @@ enum { enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, - MLX5_NIC_IFC_NO_DRAM_NIC = 2 + MLX5_NIC_IFC_NO_DRAM_NIC = 2, + MLX5_NIC_IFC_INVALID = 3 }; enum { MLX5_DROP_NEW_HEALTH_WORK, }; -static u8 get_nic_interface(struct mlx5_core_dev *dev) +static u8 get_nic_state(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; } @@ -101,7 +102,7 @@ static int in_fatal(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; - if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED) + if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) return 1; if (ioread32be(&h->fw_ver) == 0xffffffff) @@ -131,7 +132,7 @@ unlock: static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) { - u8 nic_interface = get_nic_interface(dev); + u8 nic_interface = get_nic_state(dev); switch (nic_interface) { case MLX5_NIC_IFC_FULL: @@ -153,8 +154,34 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } +static void health_recover(struct work_struct *work) +{ + struct mlx5_core_health *health; + struct delayed_work *dwork; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + u8 nic_state; + + dwork = container_of(work, struct delayed_work, work); + health = container_of(dwork, struct mlx5_core_health, recover_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + nic_state = get_nic_state(dev); + if (nic_state == MLX5_NIC_IFC_INVALID) { + dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n"); + return; + } + + dev_err(&dev->pdev->dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); +} + +/* How much time to wait until health resetting the driver (in msecs) */ +#define MLX5_RECOVERY_DELAY_MSECS 60000 static void health_care(struct work_struct *work) { + unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); struct mlx5_core_health *health; struct mlx5_core_dev *dev; struct mlx5_priv *priv; @@ -164,6 +191,14 @@ static void health_care(struct work_struct *work) dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); + + spin_lock(&health->wq_lock); + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + schedule_delayed_work(&health->recover_work, recover_delay); + else + dev_err(&dev->pdev->dev, + "new health works are not permitted at this stage\n"); + spin_unlock(&health->wq_lock); } static const char *hsynd_str(u8 synd) @@ -316,6 +351,7 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock(&health->wq_lock); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); spin_unlock(&health->wq_lock); + cancel_delayed_work_sync(&health->recover_work); cancel_work_sync(&health->work); } @@ -344,6 +380,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) return -ENOMEM; spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); + INIT_DELAYED_WORK(&health->recover_work, health_recover); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 9f90226..d5433c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1313,6 +1313,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, struct mlx5_priv *priv = &dev->priv; dev_info(&pdev->dev, "%s was called\n", __func__); + mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); /* In case of kernel call save the pci state and drain health wq */ @@ -1378,11 +1379,6 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) return PCI_ERS_RESULT_RECOVERED; } -void mlx5_disable_device(struct mlx5_core_dev *dev) -{ - mlx5_pci_err_detected(dev->pdev, 0); -} - static void mlx5_pci_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); @@ -1432,6 +1428,18 @@ static const struct pci_device_id mlx5_core_pci_table[] = { MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_err_detected(dev->pdev, 0); +} + +void mlx5_recover_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) + mlx5_pci_resume(dev->pdev); +} + static struct pci_driver mlx5_core_driver = { .name = DRIVER_NAME, .id_table = mlx5_core_pci_table, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 3d0cfb9..187662c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -83,6 +83,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7d9a5d0..ecc451d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -423,6 +423,7 @@ struct mlx5_core_health { struct workqueue_struct *wq; unsigned long flags; struct work_struct work; + struct delayed_work recover_work; }; struct mlx5_cq_table { -- cgit v0.10.2 From 6b276190c50a12511d889d9079ffb901ff94a822 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 25 Oct 2016 18:36:35 +0300 Subject: net/mlx5: Avoid passing dma address 0 to firmware Currently the firmware can't work with a page with dma address 0. Passing such an address to the firmware will cause the give_pages command to fail. To avoid this, in case we get a 0 dma address of a page from the dma engine, we avoid passing it to FW by remapping to get an address other than 0. Fixes: bf0bf77f6519 ('mlx5: Support communicating arbitrary host...') Signed-off-by: Noa Osherovich Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index cc4fd61..a57d5a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -209,6 +209,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr) static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) { struct page *page; + u64 zero_addr = 1; u64 addr; int err; int nid = dev_to_node(&dev->pdev->dev); @@ -218,26 +219,35 @@ static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) mlx5_core_warn(dev, "failed to allocate page\n"); return -ENOMEM; } +map: addr = dma_map_page(&dev->pdev->dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(&dev->pdev->dev, addr)) { mlx5_core_warn(dev, "failed dma mapping page\n"); err = -ENOMEM; - goto out_alloc; + goto err_mapping; } + + /* Firmware doesn't support page with physical address 0 */ + if (addr == 0) { + zero_addr = addr; + goto map; + } + err = insert_page(dev, addr, page, func_id); if (err) { mlx5_core_err(dev, "failed to track allocated page\n"); - goto out_mapping; + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); } - return 0; - -out_mapping: - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +err_mapping: + if (err) + __free_page(page); -out_alloc: - __free_page(page); + if (zero_addr == 0) + dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); return err; } -- cgit v0.10.2 From e4cabca54911a6be6f2e80e48fa497c7e19019a5 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 25 Oct 2016 18:08:49 -0400 Subject: inet: Fix missing return value in inet6_hash As part of a series to implement faster SO_REUSEPORT lookups, commit 086c653f5862 ("sock: struct proto hash function may error") added return values to protocol hash functions and commit 496611d7b5ea ("inet: create IPv6-equivalent inet_hash function") implemented a new hash function for IPv6. However, the latter does not respect the former's convention. This properly propagates the hash errors in the IPv6 case. Fixes: 496611d7b5ea ("inet: create IPv6-equivalent inet_hash function") Reported-by: Soheil Hassas Yeganeh Signed-off-by: Craig Gallek Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 2fd0374..02761c9 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -264,13 +264,15 @@ EXPORT_SYMBOL_GPL(inet6_hash_connect); int inet6_hash(struct sock *sk) { + int err = 0; + if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); + err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); local_bh_enable(); } - return 0; + return err; } EXPORT_SYMBOL_GPL(inet6_hash); -- cgit v0.10.2 From 96a8eb1eeed2c3485cdba198fab3a2faaec386d3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 26 Oct 2016 00:37:53 +0200 Subject: bpf: fix samples to add fake KBUILD_MODNAME Some of the sample files are causing issues when they are loaded with tc and cls_bpf, meaning tc bails out while trying to parse the resulting ELF file as program/map/etc sections are not present, which can be easily spotted with readelf(1). Currently, BPF samples are including some of the kernel headers and mid term we should change them to refrain from this, really. When dynamic debugging is enabled, we bail out due to undeclared KBUILD_MODNAME, which is easily overlooked in the build as clang spills this along with other noisy warnings from various header includes, and llc still generates an ELF file with mentioned characteristics. For just playing around with BPF examples, this can be a bit of a hurdle to take. Just add a fake KBUILD_MODNAME as a band-aid to fix the issue, same is done in xdp*_kern samples already. Fixes: 65d472fb007d ("samples/bpf: add 'pointer to packet' tests") Fixes: 6afb1e28b859 ("samples/bpf: Add tunnel set/get tests.") Fixes: a3f74617340b ("cgroup: bpf: Add an example to do cgroup checking in BPF") Reported-by: Chandrasekar Kannan Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c index d175501..6db6b21 100644 --- a/samples/bpf/parse_ldabs.c +++ b/samples/bpf/parse_ldabs.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/parse_simple.c b/samples/bpf/parse_simple.c index cf2511c..10af53d 100644 --- a/samples/bpf/parse_simple.c +++ b/samples/bpf/parse_simple.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c index edab34d..95c1632 100644 --- a/samples/bpf/parse_varlen.c +++ b/samples/bpf/parse_varlen.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index fa051b3..274c884 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -1,3 +1,4 @@ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 3303bb8..9c823a6 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -5,6 +5,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c index 10ff734..1547b36 100644 --- a/samples/bpf/test_cgrp2_tc_kern.c +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include -- cgit v0.10.2 From ae148b085876fa771d9ef2c05f85d4b4bf09ce0d Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Wed, 26 Oct 2016 10:11:09 +0800 Subject: ip6_tunnel: Update skb->protocol to ETH_P_IPV6 in ip6_tnl_xmit() This patch updates skb->protocol to ETH_P_IPV6 in ip6_tnl_xmit() when an IPv6 header is installed to a socket buffer. This is not a cosmetic change. Without updating this value, GSO packets transmitted through an ipip6 tunnel have the protocol of ETH_P_IP and skb_mac_gso_segment() will attempt to call gso_segment() for IPv4, which results in the packets being dropped. Fixes: b8921ca83eed ("ip4ip6: Support for GSO/GRO") Signed-off-by: Eli Cooper Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5692d6b..8778456 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1172,6 +1172,7 @@ route_lookup: if (err) return err; + skb->protocol = htons(ETH_P_IPV6); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); -- cgit v0.10.2 From a4256bc9ec36159e84d710c7c44aaa244a6380a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 18:16:20 +0200 Subject: IB/mlx4: avoid a -Wmaybe-uninitialize warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is an old warning about mlx4_SW2HW_EQ_wrapper on x86: ethernet/mellanox/mlx4/resource_tracker.c: In function ‘mlx4_SW2HW_EQ_wrapper’: ethernet/mellanox/mlx4/resource_tracker.c:3071:10: error: ‘eq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] The problem here is that gcc won't track the state of the variable across a spin_unlock. Moving the assignment out of the lock is safe here and avoids the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Yishai Hadas Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 84d7857..c548bea 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1605,13 +1605,14 @@ static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_EQ_BUSY; - if (eq) - *eq = r; } } spin_unlock_irq(mlx4_tlock(dev)); + if (!err && eq) + *eq = r; + return err; } -- cgit v0.10.2 From 166e6045cacccb3046883a1a4c977f173fec5ccd Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 26 Oct 2016 13:26:38 +0530 Subject: cxgb4: Fix error handling in alloc_uld_rxqs(). Fix to release resources properly in error handling path of alloc_uld_rxqs(), This patch also removes unwanted arguments and avoids calling the same function twice. Fixes: 94cdb8bb993a (cxgb4: Add support for dynamic allocation of resources for ULD Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 0945fa4..2471ff4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -135,15 +135,17 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, } static int alloc_uld_rxqs(struct adapter *adap, - struct sge_uld_rxq_info *rxq_info, - unsigned int nq, unsigned int offset, bool lro) + struct sge_uld_rxq_info *rxq_info, bool lro) { struct sge *s = &adap->sge; - struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; - unsigned short *ids = rxq_info->rspq_id + offset; - unsigned int per_chan = nq / adap->params.nports; + unsigned int nq = rxq_info->nrxq + rxq_info->nciq; + struct sge_ofld_rxq *q = rxq_info->uldrxq; + unsigned short *ids = rxq_info->rspq_id; unsigned int bmap_idx = 0; - int i, err, msi_idx; + unsigned int per_chan; + int i, err, msi_idx, que_idx = 0; + + per_chan = rxq_info->nrxq / adap->params.nports; if (adap->flags & USING_MSIX) msi_idx = 1; @@ -151,12 +153,18 @@ static int alloc_uld_rxqs(struct adapter *adap, msi_idx = -((int)s->intrq.abs_id + 1); for (i = 0; i < nq; i++, q++) { + if (i == rxq_info->nrxq) { + /* start allocation of concentrator queues */ + per_chan = rxq_info->nciq / adap->params.nports; + que_idx = 0; + } + if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], + adap->port[que_idx++ / per_chan], msi_idx, q->fl.size ? &q->fl : NULL, uldrx_handler, @@ -165,29 +173,19 @@ static int alloc_uld_rxqs(struct adapter *adap, if (err) goto freeout; if (msi_idx >= 0) - rxq_info->msix_tbl[i + offset] = bmap_idx; + rxq_info->msix_tbl[i] = bmap_idx; memset(&q->stats, 0, sizeof(q->stats)); if (ids) ids[i] = q->rspq.abs_id; } return 0; freeout: - q = rxq_info->uldrxq + offset; + q = rxq_info->uldrxq; for ( ; i; i--, q++) { if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); } - - /* We need to free rxq also in case of ciq allocation failure */ - if (offset) { - q = rxq_info->uldrxq + offset; - for ( ; i; i--, q++) { - if (q->rspq.desc) - free_rspq_fl(adap, &q->rspq, - q->fl.size ? &q->fl : NULL); - } - } return err; } @@ -205,9 +203,7 @@ setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) return -ENOMEM; } - ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && - !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, - rxq_info->nrxq, lro)); + ret = !(!alloc_uld_rxqs(adap, rxq_info, lro)); /* Tell uP to route control queue completions to rdma rspq */ if (adap->flags & FULL_INIT_DONE && -- cgit v0.10.2 From 4700e9ce6ed8526a14e6cf9d7e319f675c16d0a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Oct 2016 14:44:33 +0200 Subject: net_sched actions: use nla_parse_nested() Use nla_parse_nested instead of open-coding the call to nla_parse() with the attribute data/len. Signed-off-by: Johannes Berg Acked-by: Cong Wang Signed-off-by: David S. Miller diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a512b18..f893d18 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1028,8 +1028,7 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n) if (tb[1] == NULL) return NULL; - if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]), - nla_len(tb[1]), NULL) < 0) + if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; -- cgit v0.10.2 From 104ba78c98808ae837d1f63aae58c183db5505df Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 26 Oct 2016 11:23:07 -0400 Subject: packet: on direct_xmit, limit tso and csum to supported devices When transmitting on a packet socket with PACKET_VNET_HDR and PACKET_QDISC_BYPASS, validate device support for features requested in vnet_hdr. Drop TSO packets sent to devices that do not support TSO or have the feature disabled. Note that the latter currently do process those packets correctly, regardless of not advertising the feature. Because of SKB_GSO_DODGY, it is not sufficient to test device features with netif_needs_gso. Full validate_xmit_skb is needed. Switch to software checksum for non-TSO packets that request checksum offload if that device feature is unsupported or disabled. Note that similar to the TSO case, device drivers may perform checksum offload correctly even when not advertising it. When switching to software checksum, packets hit skb_checksum_help, which has two BUG_ON checksum not in linear segment. Packet sockets always allocate at least up to csum_start + csum_off + 2 as linear. Tested by running github.com/wdebruij/kerneltools/psock_txring_vnet.c ethtool -K eth0 tso off tx on psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v -N ethtool -K eth0 tx off psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G -N v2: - add EXPORT_SYMBOL_GPL(validate_xmit_skb_list) Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index dbc8713..f745112 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3035,6 +3035,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } return head; } +EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 11db0d6..d2238b2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -250,7 +250,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - netdev_features_t features; + struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -258,9 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb) !netif_carrier_ok(dev))) goto drop; - features = netif_skb_features(skb); - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) + skb = validate_xmit_skb_list(skb, dev); + if (skb != orig_skb) goto drop; txq = skb_get_tx_queue(dev, skb); @@ -280,7 +279,7 @@ static int packet_direct_xmit(struct sk_buff *skb) return ret; drop: atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return NET_XMIT_DROP; } -- cgit v0.10.2 From e934f684856393a0b2aecc7fdd8357a48b79c535 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 26 Oct 2016 08:30:29 -0700 Subject: Revert "hv_netvsc: report vmbus name in ethtool" This reverts commit e3f74b841d48 ("hv_netvsc: report vmbus name in ethtool")' because of problem introduced by commit f9a56e5d6a0ba ("Drivers: hv: make VMBus bus ids persistent"). This changed the format of the vmbus name and this new format is too long to fit in the bus_info field of ethtool. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c71d966..f638215 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -699,12 +699,8 @@ int netvsc_recv_callback(struct hv_device *device_obj, static void netvsc_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { - struct net_device_context *net_device_ctx = netdev_priv(net); - struct hv_device *dev = net_device_ctx->device_ctx; - strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); - strlcpy(info->bus_info, vmbus_dev_name(dev), sizeof(info->bus_info)); } static void netvsc_get_channels(struct net_device *net, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6824556..cd184bd 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1169,13 +1169,6 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, const char *mod_name); void vmbus_driver_unregister(struct hv_driver *hv_driver); -static inline const char *vmbus_dev_name(const struct hv_device *device_obj) -{ - const struct kobject *kobj = &device_obj->device.kobj; - - return kobj->name; -} - void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, -- cgit v0.10.2 From fd33b2447bec7926ceaf4a4b74b68ea2466f2ae0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 26 Oct 2016 11:47:02 -0600 Subject: net: mv643xx_eth: Fetch the phy connection type from DT The MAC is capable of RGMII mode and that is probably a more typical connection type than GMII today (eg it is used by Marvell Reference designs for several SOCs). Let DT users specify the standard phy-connection-type = "rgmii-id"; On a phy node. Signed-off-by: Jason Gunthorpe Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/Documentation/devicetree/bindings/net/marvell-orion-net.txt b/Documentation/devicetree/bindings/net/marvell-orion-net.txt index bce52b2..6fd988c 100644 --- a/Documentation/devicetree/bindings/net/marvell-orion-net.txt +++ b/Documentation/devicetree/bindings/net/marvell-orion-net.txt @@ -49,6 +49,7 @@ Optional port properties: and - phy-handle: See ethernet.txt file in the same directory. + - phy-mode: See ethernet.txt file in the same directory. or diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 5583118..bf5cc55b 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2968,6 +2968,22 @@ static void set_params(struct mv643xx_eth_private *mp, mp->txq_count = pd->tx_queue_count ? : 1; } +static int get_phy_mode(struct mv643xx_eth_private *mp) +{ + struct device *dev = mp->dev->dev.parent; + int iface = -1; + + if (dev->of_node) + iface = of_get_phy_mode(dev->of_node); + + /* Historical default if unspecified. We could also read/write + * the interface state in the PSC1 + */ + if (iface < 0) + iface = PHY_INTERFACE_MODE_GMII; + return iface; +} + static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, int phy_addr) { @@ -2994,7 +3010,7 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, "orion-mdio-mii", addr); phydev = phy_connect(mp->dev, phy_id, mv643xx_eth_adjust_link, - PHY_INTERFACE_MODE_GMII); + get_phy_mode(mp)); if (!IS_ERR(phydev)) { phy_addr_set(mp, addr); break; @@ -3090,6 +3106,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; + SET_NETDEV_DEV(dev, &pdev->dev); mp = netdev_priv(dev); platform_set_drvdata(pdev, mp); @@ -3129,7 +3146,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (pd->phy_node) { mp->phy = of_phy_connect(mp->dev, pd->phy_node, mv643xx_eth_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); + get_phy_mode(mp)); if (!mp->phy) err = -ENODEV; else @@ -3187,8 +3204,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev) dev->priv_flags |= IFF_UNICAST_FLT; dev->gso_max_segs = MV643XX_MAX_TSO_SEGS; - SET_NETDEV_DEV(dev, &pdev->dev); - if (mp->shared->win_protect) wrl(mp, WINDOW_PROTECT(mp->port_num), mp->shared->win_protect); -- cgit v0.10.2 From 8d7533e5aaad1c94386a8101a36b0617987966b7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 26 Oct 2016 13:57:38 -0500 Subject: ibmvnic: Fix releasing of sub-CRQ IRQs in interrupt context Schedule these XPORT event tasks in the shared workqueue so that IRQs are not freed in an interrupt context when sub-CRQs are released. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 213162d..0459d19 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3232,6 +3232,27 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) spin_unlock_irqrestore(&adapter->inflight_lock, flags); } +static void ibmvnic_xport_event(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter = container_of(work, + struct ibmvnic_adapter, + ibmvnic_xport); + struct device *dev = &adapter->vdev->dev; + int rc; + + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); + if (adapter->migrated) { + rc = ibmvnic_reenable_crq_queue(adapter); + if (rc) + dev_err(dev, "Error after enable rc=%ld\n", rc); + adapter->migrated = false; + rc = ibmvnic_send_crq_init(adapter); + if (rc) + dev_err(dev, "Error sending init rc=%ld\n", rc); + } +} + static void ibmvnic_handle_crq(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { @@ -3267,15 +3288,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { dev_info(dev, "Re-enabling adapter\n"); adapter->migrated = true; - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); - rc = ibmvnic_reenable_crq_queue(adapter); - if (rc) - dev_err(dev, "Error after enable rc=%ld\n", rc); - adapter->migrated = false; - rc = ibmvnic_send_crq_init(adapter); - if (rc) - dev_err(dev, "Error sending init rc=%ld\n", rc); + schedule_work(&adapter->ibmvnic_xport); } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { dev_info(dev, "Backing device failover detected\n"); netif_carrier_off(netdev); @@ -3284,8 +3297,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", gen_crq->cmd); - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); + schedule_work(&adapter->ibmvnic_xport); } return; case IBMVNIC_CRQ_CMD_RSP: @@ -3726,6 +3738,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) SET_NETDEV_DEV(netdev, &dev->dev); INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp); + INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event); spin_lock_init(&adapter->stats_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 878e2c0..dd775d9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1048,5 +1048,6 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; + struct work_struct ibmvnic_xport; bool failover; }; -- cgit v0.10.2 From ff57087f314be8d458dca6bdd41be5b4236482a0 Mon Sep 17 00:00:00 2001 From: shamir rabinovitch Date: Thu, 27 Oct 2016 05:46:38 -0400 Subject: rds: debug messages are enabled by default rds use Kconfig option called "RDS_DEBUG" to enable rds debug messages. This option cause the rds Makefile to add -DDEBUG to the rds gcc command line. When CONFIG_DYNAMIC_DEBUG is enabled, the "DEBUG" macro is used by include/linux/dynamic_debug.h to decide if dynamic debug prints should be sent by default to the kernel log. rds should not enable this macro for production builds. rds dynamic debug work as expected follow this fix. Signed-off-by: Shamir Rabinovitch Acked-by: Santosh Shilimkar Reviewed-by: Wengang Wang Signed-off-by: David S. Miller diff --git a/net/rds/Makefile b/net/rds/Makefile index 0e72bec..56c7d27 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -13,5 +13,5 @@ obj-$(CONFIG_RDS_TCP) += rds_tcp.o rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ tcp_send.o tcp_stats.o -ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG +ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG diff --git a/net/rds/rds.h b/net/rds/rds.h index fd0bccb..67ba67c 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -33,7 +33,7 @@ #define KERNEL_HAS_ATOMIC64 #endif -#ifdef DEBUG +#ifdef RDS_DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else /* sigh, pr_debug() causes unused variable warnings */ -- cgit v0.10.2 From aa0c08feae8161b945520ada753d0dfe62b14fe7 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 27 Oct 2016 16:27:13 +0300 Subject: net/mlx4_core: Fix the resource-type enum in res tracker to conform to FW spec The resource type enum in the resource tracker was incorrect. RES_EQ was put in the position of RES_NPORT_ID (a FC resource). Since the remaining resources maintain their current values, and RES_EQ is not passed from slaves to the hypervisor in any FW command, this change affects only the hypervisor. Therefore, there is no backwards-compatibility issue. Fixes: 623ed84b1f95 ("mlx4_core: initial header-file changes for SRIOV support") Signed-off-by: Jack Morgenstein Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e4878f3..7aad076 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -145,9 +145,10 @@ enum mlx4_resource { RES_MTT, RES_MAC, RES_VLAN, - RES_EQ, + RES_NPORT_ID, RES_COUNTER, RES_FS_RULE, + RES_EQ, MLX4_NUM_OF_RESOURCE_TYPE }; -- cgit v0.10.2 From 33a1f8b196dca933313c001866c4df3f3ca11f78 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Oct 2016 16:27:14 +0300 Subject: net/mlx4_core: Avoid setting ports to auto when only one port type is supported When only one port type is supported, it should be read only. We reject changing requests, even to the auto sense mode. Fixes: 27bf91d6a0d5 ("mlx4_core: Add link type autosensing") Signed-off-by: Maor Gottlieb Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7183ac4..6f4e67b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1102,6 +1102,14 @@ static int __set_port_type(struct mlx4_port_info *info, int i; int err = 0; + if ((port_type & mdev->caps.supported_type[info->port]) != port_type) { + mlx4_err(mdev, + "Requested port type for port %d is not supported on this HCA\n", + info->port); + err = -EINVAL; + goto err_sup; + } + mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); info->tmp_type = port_type; @@ -1147,7 +1155,7 @@ static int __set_port_type(struct mlx4_port_info *info, out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); - +err_sup: return err; } -- cgit v0.10.2 From 72da2e911f79d4c7132d7431a97d46659ee862be Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Thu, 27 Oct 2016 16:27:15 +0300 Subject: net/mlx4_core: Change the default value of enable_qos Change the default status of quality of service back to disabled, as it hurts performance in some cases. Fixes: 38438f7c7e8c ("net/mlx4: Set enhanced QoS support by default when ...") Signed-off-by: Moshe Lazer Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c41ab31..84bab9f0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -49,9 +49,9 @@ enum { extern void __buggy_use_of_MLX4_GET(void); extern void __buggy_use_of_MLX4_PUT(void); -static bool enable_qos = true; +static bool enable_qos; module_param(enable_qos, bool, 0444); -MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: on)"); +MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ -- cgit v0.10.2 From 4850cf4581578216468b7b3c3d06cc5abb0a697d Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:16 +0300 Subject: net/mlx4_en: Resolve dividing by zero in 32-bit system When doing roundup_pow_of_two for large enough number with bit 31, an overflow will occur and a value equal to 1 will be returned. In this case 1 will be subtracted from the return value and division by zero will be reached. Fixes: 31c128b66e5b ("net/mlx4_en: Choose time-stamping shift value according to HW frequency") Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 08fc5fc..a5fc46b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -245,8 +245,11 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 tmp_rounded = + roundup_pow_of_two(max_val_cycles) > max_val_cycles ? + roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + max_val_cycles : tmp_rounded; /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); -- cgit v0.10.2 From 8d59de8f7bb3db296331c665779c653b0c8d13ba Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 27 Oct 2016 16:27:17 +0300 Subject: net/mlx4_en: Process all completions in RX rings after port goes up Currently there is a race between incoming traffic and initialization flow. HW is able to receive the packets after INIT_PORT is done and unicast steering is configured. Before we set priv->port_up NAPI is not scheduled and receive queues become full. Therefore we never get new interrupts about the completions. This issue could happen if running heavy traffic during bringing port up. The resolution is to schedule NAPI once port_up is set. If receive queues were full this will process all cqes and release them. Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC") Signed-off-by: Erez Shitrit Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7e703be..e25c11d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1733,6 +1733,13 @@ int mlx4_en_start_port(struct net_device *dev) udp_tunnel_get_rx_info(dev); priv->port_up = true; + + /* Process all completions if exist to prevent + * the queues freezing if they are full + */ + for (i = 0; i < priv->rx_ring_num; i++) + napi_schedule(&priv->rx_cq[i]->napi); + netif_tx_start_all_queues(dev); netif_device_attach(dev); -- cgit v0.10.2 From 9d2afba058722d40cc02f430229c91611c0e8d16 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:18 +0300 Subject: net/mlx4_en: Fix panic during reboot Fix a kernel panic that occurs as a result of an asynchronous event handled in roce_gid_mgmt: mlx4_en_get_drvinfo is called and accesses freed resources. This happens in a shutdown flow only, since pci device is destroyed while netdevice is still alive. Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC") Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e25c11d..314f54c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2201,6 +2201,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (!shutdown) free_netdev(dev); + dev->ethtool_ops = NULL; } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) -- cgit v0.10.2 From 81d184199e328fdad5633da139a10337327154e0 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 27 Oct 2016 16:27:19 +0300 Subject: net/mlx4_core: Do not access comm channel if it has not yet been initialized In the Hypervisor, there are several FW commands which are invoked before the comm channel is initialized (in mlx4_multi_func_init). These include MOD_STAT_CONFIG, QUERY_DEV_CAP, INIT_HCA, and others. If any of these commands fails, say with a timeout, the Hypervisor driver enters the internal error reset flow. In this flow, the driver attempts to notify all slaves via the comm channel that an internal error has occurred. Since the comm channel has not yet been initialized (i.e., mapped via ioremap), this will cause dereferencing a NULL pointer. To fix this, do not access the comm channel in the internal error flow if it has not yet been initialized. Fixes: 55ad359225b2 ("net/mlx4_core: Enable device recovery flow with SRIOV") Fixes: ab9c17a009ee ("mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index b1cef7a..e36bebc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2469,6 +2469,7 @@ err_comm_admin: kfree(priv->mfunc.master.slave_state); err_comm: iounmap(priv->mfunc.comm); + priv->mfunc.comm = NULL; err_vhcr: dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, priv->mfunc.vhcr, @@ -2537,6 +2538,13 @@ void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) int slave; u32 slave_read; + /* If the comm channel has not yet been initialized, + * skip reporting the internal error event to all + * the communication channels. + */ + if (!priv->mfunc.comm) + return; + /* Report an internal error event to all * communication channels. */ @@ -2571,6 +2579,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) } iounmap(priv->mfunc.comm); + priv->mfunc.comm = NULL; } void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) -- cgit v0.10.2 From 6f2e0d2c3bf0f8d322ab7516c57340c7189cca02 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:20 +0300 Subject: net/mlx4: Fix firmware command timeout during interrupt test Currently interrupt test that is part of ethtool selftest runs the check over all interrupt vectors of the device. In mlx4_en package part of interrupt vectors are uninitialized since mlx4_ib doesn't exist. This causes NOP FW command to time out. Change logic to test current port interrupt vectors only. Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index b66e03d..c06346a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -118,6 +118,29 @@ mlx4_en_test_loopback_exit: return !loopback_ok; } +static int mlx4_en_test_interrupts(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + int i = 0; + + err = mlx4_test_async(mdev->dev); + /* When not in MSI_X or slave, test only async */ + if (!(mdev->dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(mdev->dev)) + return err; + + /* A loop over all completion vectors of current port, + * for each vector check whether it works by mapping command + * completions to that vector and performing a NOP command + */ + for (i = 0; i < priv->rx_ring_num; i++) { + err = mlx4_test_interrupt(mdev->dev, priv->rx_cq[i]->vector); + if (err) + break; + } + + return err; +} static int mlx4_en_test_link(struct mlx4_en_priv *priv) { @@ -151,7 +174,6 @@ static int mlx4_en_test_speed(struct mlx4_en_priv *priv) void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; int i, carrier_ok; memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); @@ -177,7 +199,7 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) netif_carrier_on(dev); } - buf[0] = mlx4_test_interrupts(mdev->dev); + buf[0] = mlx4_en_test_interrupts(priv); buf[1] = mlx4_en_test_link(priv); buf[2] = mlx4_en_test_speed(priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index cf8f8a7..cd3638e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1361,53 +1361,49 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) kfree(priv->eq_table.uar_map); } -/* A test that verifies that we can accept interrupts on all - * the irq vectors of the device. +/* A test that verifies that we can accept interrupts + * on the vector allocated for asynchronous events + */ +int mlx4_test_async(struct mlx4_dev *dev) +{ + return mlx4_NOP(dev); +} +EXPORT_SYMBOL(mlx4_test_async); + +/* A test that verifies that we can accept interrupts + * on the given irq vector of the tested port. * Interrupts are checked using the NOP command. */ -int mlx4_test_interrupts(struct mlx4_dev *dev) +int mlx4_test_interrupt(struct mlx4_dev *dev, int vector) { struct mlx4_priv *priv = mlx4_priv(dev); - int i; int err; - err = mlx4_NOP(dev); - /* When not in MSI_X, there is only one irq to check */ - if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev)) - return err; - - /* A loop over all completion vectors, for each vector we will check - * whether it works by mapping command completions to that vector - * and performing a NOP command - */ - for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) { - /* Make sure request_irq was called */ - if (!priv->eq_table.eq[i].have_irq) - continue; - - /* Temporary use polling for command completions */ - mlx4_cmd_use_polling(dev); - - /* Map the new eq to handle all asynchronous events */ - err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[i].eqn); - if (err) { - mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); - mlx4_cmd_use_events(dev); - break; - } + /* Temporary use polling for command completions */ + mlx4_cmd_use_polling(dev); - /* Go back to using events */ - mlx4_cmd_use_events(dev); - err = mlx4_NOP(dev); + /* Map the new eq to handle all asynchronous events */ + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, + priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn); + if (err) { + mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); + goto out; } + /* Go back to using events */ + mlx4_cmd_use_events(dev); + err = mlx4_NOP(dev); + /* Return to default */ + mlx4_cmd_use_polling(dev); +out: mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + mlx4_cmd_use_events(dev); + return err; } -EXPORT_SYMBOL(mlx4_test_interrupts); +EXPORT_SYMBOL(mlx4_test_interrupt); bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f6a16429..3be7abd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1399,7 +1399,8 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); -int mlx4_test_interrupts(struct mlx4_dev *dev); +int mlx4_test_interrupt(struct mlx4_dev *dev, int vector); +int mlx4_test_async(struct mlx4_dev *dev); int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, const u32 offset[], u32 value[], size_t array_len, u8 port); -- cgit v0.10.2 From d2582a03939ed0a80ffcd3ea5345505bc8067c54 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 27 Oct 2016 16:27:21 +0300 Subject: net/mlx4_en: Fix potential deadlock in port statistics flow mlx4_en_DUMP_ETH_STATS took the *counter mutex* and then called the FW command, with WRAPPED attribute. As a result, the fw command is wrapped on the Hypervisor when it calls mlx4_en_DUMP_ETH_STATS. The FW command wrapper flow on the hypervisor takes the *slave_cmd_mutex* during processing. At the same time, a VF could be in the process of coming up, and could call mlx4_QUERY_FUNC_CAP. On the hypervisor, the command flow takes the *slave_cmd_mutex*, then executes mlx4_QUERY_FUNC_CAP_wrapper. mlx4_QUERY_FUNC_CAP wrapper calls mlx4_get_default_counter_index(), which takes the *counter mutex*. DEADLOCK. The fix is that the DUMP_ETH_STATS fw command should be called with the NATIVE attribute, so that on the hypervisor, this command does not enter the wrapper flow. Since the Hypervisor no longer goes through the wrapper code, we also simply return 0 in mlx4_DUMP_ETH_STATS_wrapper (i.e.the function succeeds, but the returned data will be all zeroes). No need to test if it is the Hypervisor going through the wrapper. Fixes: f9baff509f8a ("mlx4_core: Add "native" argument to mlx4_cmd ...") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 5aa8b75..59473a0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -166,7 +166,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) return PTR_ERR(mailbox); err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) goto out; @@ -322,7 +322,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL, 0, MLX4_CMD_DUMP_ETH_STATS, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 7aad076..88ee7d8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1330,8 +1330,6 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd); int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function, int port, void *buf); -int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod, - struct mlx4_cmd_mailbox *outbox); int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index c5b2064..b656dd5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1728,24 +1728,13 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, return err; } -int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, - u32 in_mod, struct mlx4_cmd_mailbox *outbox) -{ - return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0, - MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_NATIVE); -} - int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { - if (slave != dev->caps.function) - return 0; - return mlx4_common_dump_eth_stats(dev, slave, - vhcr->in_modifier, outbox); + return 0; } int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, -- cgit v0.10.2 From eb4b678825992aa434d32b2f615d2090281e0f88 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 27 Oct 2016 16:27:22 +0300 Subject: net/mlx4_en: Save slave ethtool stats command Following the previous patch, as an optimization, the slave will not even bother sending the DUMP_ETH_STATS command over the comm channel. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 314f54c8..12c99a2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1917,8 +1917,9 @@ static void mlx4_en_clear_stats(struct net_device *dev) struct mlx4_en_dev *mdev = priv->mdev; int i; - if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) - en_dbg(HW, priv, "Failed dumping statistics\n"); + if (!mlx4_is_slave(mdev->dev)) + if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) + en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); -- cgit v0.10.2 From f9d4286b9516b02e795214412d36885f572b57ad Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 27 Oct 2016 16:30:06 +0200 Subject: arch/powerpc: Update parameters for csum_tcpudp_magic & csum_tcpudp_nofold Commit 01cfbad "ipv4: Update parameters for csum_tcpudp_magic to their original types" changed parameters for csum_tcpudp_magic and csum_tcpudp_nofold for many platforms but not for PowerPC. Fixes: 01cfbad "ipv4: Update parameters for csum_tcpudp_magic to their original types" Cc: Alexander Duyck Signed-off-by: Ivan Vecera Acked-by: Alexander Duyck Signed-off-by: David S. Miller diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index ee655ed..1e8fceb 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } -static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { #ifdef __powerpc64__ unsigned long s = (__force u32)sum; @@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } -- cgit v0.10.2 From dbc34e73c2bee4ff66c3a6b0ea5d65c25a6b6994 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 29 Oct 2016 17:18:17 -0400 Subject: Revert "ibmvnic: Fix releasing of sub-CRQ IRQs in interrupt context" This reverts commit 8d7533e5aaad1c94386a8101a36b0617987966b7. It introduced kbuild failures, new version coming. Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0459d19..213162d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3232,27 +3232,6 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) spin_unlock_irqrestore(&adapter->inflight_lock, flags); } -static void ibmvnic_xport_event(struct work_struct *work) -{ - struct ibmvnic_adapter *adapter = container_of(work, - struct ibmvnic_adapter, - ibmvnic_xport); - struct device *dev = &adapter->vdev->dev; - int rc; - - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); - if (adapter->migrated) { - rc = ibmvnic_reenable_crq_queue(adapter); - if (rc) - dev_err(dev, "Error after enable rc=%ld\n", rc); - adapter->migrated = false; - rc = ibmvnic_send_crq_init(adapter); - if (rc) - dev_err(dev, "Error sending init rc=%ld\n", rc); - } -} - static void ibmvnic_handle_crq(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { @@ -3288,7 +3267,15 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { dev_info(dev, "Re-enabling adapter\n"); adapter->migrated = true; - schedule_work(&adapter->ibmvnic_xport); + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); + rc = ibmvnic_reenable_crq_queue(adapter); + if (rc) + dev_err(dev, "Error after enable rc=%ld\n", rc); + adapter->migrated = false; + rc = ibmvnic_send_crq_init(adapter); + if (rc) + dev_err(dev, "Error sending init rc=%ld\n", rc); } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { dev_info(dev, "Backing device failover detected\n"); netif_carrier_off(netdev); @@ -3297,7 +3284,8 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", gen_crq->cmd); - schedule_work(&adapter->ibmvnic_xport); + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); } return; case IBMVNIC_CRQ_CMD_RSP: @@ -3738,7 +3726,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) SET_NETDEV_DEV(netdev, &dev->dev); INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp); - INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event); spin_lock_init(&adapter->stats_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index dd775d9..878e2c0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1048,6 +1048,5 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; - struct work_struct ibmvnic_xport; bool failover; }; -- cgit v0.10.2 From 9888d7b02c7793cbbcbdd05dd9e14cc0e78d1db7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 27 Oct 2016 12:28:51 -0500 Subject: ibmvnic: Fix releasing of sub-CRQ IRQs in interrupt context Schedule these XPORT event tasks in the shared workqueue so that IRQs are not freed in an interrupt context when sub-CRQs are released. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 213162d..a922fb9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3232,6 +3232,27 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) spin_unlock_irqrestore(&adapter->inflight_lock, flags); } +static void ibmvnic_xport_event(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter = container_of(work, + struct ibmvnic_adapter, + ibmvnic_xport); + struct device *dev = &adapter->vdev->dev; + long rc; + + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); + if (adapter->migrated) { + rc = ibmvnic_reenable_crq_queue(adapter); + if (rc) + dev_err(dev, "Error after enable rc=%ld\n", rc); + adapter->migrated = false; + rc = ibmvnic_send_crq_init(adapter); + if (rc) + dev_err(dev, "Error sending init rc=%ld\n", rc); + } +} + static void ibmvnic_handle_crq(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { @@ -3267,15 +3288,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { dev_info(dev, "Re-enabling adapter\n"); adapter->migrated = true; - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); - rc = ibmvnic_reenable_crq_queue(adapter); - if (rc) - dev_err(dev, "Error after enable rc=%ld\n", rc); - adapter->migrated = false; - rc = ibmvnic_send_crq_init(adapter); - if (rc) - dev_err(dev, "Error sending init rc=%ld\n", rc); + schedule_work(&adapter->ibmvnic_xport); } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { dev_info(dev, "Backing device failover detected\n"); netif_carrier_off(netdev); @@ -3284,8 +3297,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", gen_crq->cmd); - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); + schedule_work(&adapter->ibmvnic_xport); } return; case IBMVNIC_CRQ_CMD_RSP: @@ -3726,6 +3738,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) SET_NETDEV_DEV(netdev, &dev->dev); INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp); + INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event); spin_lock_init(&adapter->stats_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 878e2c0..dd775d9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1048,5 +1048,6 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; + struct work_struct ibmvnic_xport; bool failover; }; -- cgit v0.10.2 From 8bf371e6adff29758cc3c57c17df4486513081f8 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 27 Oct 2016 12:28:52 -0500 Subject: ibmvnic: Fix missing brackets in init_sub_crq_irqs Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a922fb9..5f44c55 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1461,14 +1461,16 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) return rc; req_rx_irq_failed: - for (j = 0; j < i; j++) + for (j = 0; j < i; j++) { free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]); irq_dispose_mapping(adapter->rx_scrq[j]->irq); + } i = adapter->req_tx_queues; req_tx_irq_failed: - for (j = 0; j < i; j++) + for (j = 0; j < i; j++) { free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); irq_dispose_mapping(adapter->rx_scrq[j]->irq); + } release_sub_crqs_no_irqs(adapter); return rc; } -- cgit v0.10.2 From 06bd2b1ed04ca9fdbc767859885944a1e8b86b40 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 27 Oct 2016 18:51:55 -0400 Subject: tipc: fix broadcast link synchronization problem In commit 2d18ac4ba745 ("tipc: extend broadcast link initialization criteria") we tried to fix a problem with the initial synchronization of broadcast link acknowledge values. Unfortunately that solution is not sufficient to solve the issue. We have seen it happen that LINK_PROTOCOL/STATE packets with a valid non-zero unicast acknowledge number may bypass BCAST_PROTOCOL initialization, NAME_DISTRIBUTOR and other STATE packets with invalid broadcast acknowledge numbers, leading to premature opening of the broadcast link. When the bypassed packets finally arrive, they are inadvertently accepted, and the already correctly initialized acknowledge number in the broadcast receive link is overwritten by the invalid (zero) value of the said packets. After this the broadcast link goes stale. We now fix this by marking the packets where we know the acknowledge value is or may be invalid, and then ignoring the acks from those. To this purpose, we claim an unused bit in the header to indicate that the value is invalid. We set the bit to 1 in the initial BCAST_PROTOCOL synchronization packet and all initial ("bulk") NAME_DISTRIBUTOR packets, plus those LINK_PROTOCOL packets sent out before the broadcast links are fully synchronized. This minor protocol update is fully backwards compatible. Reported-by: John Thompson Tested-by: John Thompson Signed-off-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 753f774..aa1babb 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -247,11 +247,17 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) * * RCU is locked, no other locks set */ -void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + u16 acked = msg_bcast_ack(hdr); struct sk_buff_head xmitq; + /* Ignore bc acks sent by peer before bcast synch point was received */ + if (msg_bc_ack_invalid(hdr)) + return; + __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); @@ -279,11 +285,11 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); - if (msg_type(hdr) == STATE_MSG) { + if (msg_type(hdr) != STATE_MSG) { + tipc_link_bc_init_rcv(l, hdr); + } else if (!msg_bc_ack_invalid(hdr)) { tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); - } else { - tipc_link_bc_init_rcv(l, hdr); } tipc_bcast_unlock(net); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 5ffe344..855d53c 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,8 @@ void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); int tipc_bcast_get_mtu(struct net *net); int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); -void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); diff --git a/net/tipc/link.c b/net/tipc/link.c index b36e16c..1055164 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1312,6 +1312,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_ack_invalid(hdr, !node_up); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); @@ -1574,6 +1575,7 @@ static void tipc_link_build_bc_init_msg(struct tipc_link *l, __skb_queue_head_init(&list); if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) return; + msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true); tipc_link_xmit(l, &list, xmitq); } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c3832cd..50a7398 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -714,6 +714,23 @@ static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) msg_set_bits(m, 5, 13, 0x1, s); } +static inline bool msg_bc_ack_invalid(struct tipc_msg *m) +{ + switch (msg_user(m)) { + case BCAST_PROTOCOL: + case NAME_DISTRIBUTOR: + case LINK_PROTOCOL: + return msg_bits(m, 5, 14, 0x1); + default: + return false; + } +} + +static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid) +{ + msg_set_bits(m, 5, 14, 0x1, invalid); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index a04fe9b..c1cfd92 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -156,6 +156,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, pr_warn("Bulk publication failure\n"); return; } + msg_set_bc_ack_invalid(buf_msg(skb), true); item = (struct distr_item *)msg_data(buf_msg(skb)); } diff --git a/net/tipc/node.c b/net/tipc/node.c index 7ef14e2..9d2f4c2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1535,7 +1535,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(usr == LINK_PROTOCOL)) tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) - tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); + tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); /* Receive packet directly if conditions permit */ tipc_node_read_lock(n); -- cgit v0.10.2 From 9fe1c98ac90023842ae7cd921badfa1029e45bd1 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Thu, 27 Oct 2016 16:01:03 -0700 Subject: enic: fix rq disable When MTU is changed from 9000 to 1500 while there is burst of inbound 9000 bytes packets, adaptor sometimes delivers 9000 bytes packets to 1500 bytes buffers. This causes memory corruption and sometimes crash. This is because of a race condition in adaptor between "RQ disable" clearing descriptor mini-cache and mini-cache valid bit being set by completion of descriptor fetch. This can result in stale RQ desc being cached and used when packets arrive. In this case, the stale descriptor have old MTU value. Solution is to write RQ->disable twice. The first write will stop any further desc fetches, allowing the second disable to clear the mini-cache valid bit without danger of a race. Also, the check for rq->running becoming 0 after writing rq->enable to 0 is not done properly. When incoming packets are flooding the interface, rq->running will pulse high for each dropped packet. Since the driver was waiting for 10us between each poll, it is possible to see rq->running = 1 1000 times in a row, even though it is not actually stuck running. This results in false failure of vnic_rq_disable(). Fix is to try more than 1000 time without delay between polls to ensure we do not miss when running goes low. In old adaptors rq->enable needs to be re-written to 0 when posted_index is reset in vnic_rq_clean() in order to keep rq->prefetch_index in sync. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index e572a52..36bc2c7 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -169,19 +169,28 @@ int vnic_rq_disable(struct vnic_rq *rq) { unsigned int wait; struct vnic_dev *vdev = rq->vdev; + int i; - iowrite32(0, &rq->ctrl->enable); + /* Due to a race condition with clearing RQ "mini-cache" in hw, we need + * to disable the RQ twice to guarantee that stale descriptors are not + * used when this RQ is re-enabled. + */ + for (i = 0; i < 2; i++) { + iowrite32(0, &rq->ctrl->enable); - /* Wait for HW to ACK disable request */ - for (wait = 0; wait < 1000; wait++) { - if (!(ioread32(&rq->ctrl->running))) - return 0; - udelay(10); - } + /* Wait for HW to ACK disable request */ + for (wait = 20000; wait > 0; wait--) + if (!ioread32(&rq->ctrl->running)) + break; + if (!wait) { + vdev_neterr(vdev, "Failed to disable RQ[%d]\n", + rq->index); - vdev_neterr(vdev, "Failed to disable RQ[%d]\n", rq->index); + return -ETIMEDOUT; + } + } - return -ETIMEDOUT; + return 0; } void vnic_rq_clean(struct vnic_rq *rq, @@ -212,6 +221,11 @@ void vnic_rq_clean(struct vnic_rq *rq, [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES(count)]; iowrite32(fetch_index, &rq->ctrl->posted_index); + /* Anytime we write fetch_index, we need to re-write 0 to rq->enable + * to re-sync internal VIC state. + */ + iowrite32(0, &rq->ctrl->enable); + vnic_dev_clear_desc_ring(&rq->ring); } -- cgit v0.10.2 From 3034783472f5353f71af44ed52ad9ee65f9f6d17 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 28 Oct 2016 12:40:20 +0300 Subject: net: phy: dp83848: add dp83822 PHY support This PHY has a compatible register set with DP83848x so add support for it. Acked-by: Andrew F. Davis Signed-off-by: Roger Quadros Signed-off-by: David S. Miller diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 03d54c4..800b39f 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -19,6 +19,7 @@ #define TI_DP83848C_PHY_ID 0x20005ca0 #define NS_DP83848C_PHY_ID 0x20005c90 #define TLK10X_PHY_ID 0x2000a210 +#define TI_DP83822_PHY_ID 0x2000a240 /* Registers */ #define DP83848_MICR 0x11 /* MII Interrupt Control Register */ @@ -77,6 +78,7 @@ static struct mdio_device_id __maybe_unused dp83848_tbl[] = { { TI_DP83848C_PHY_ID, 0xfffffff0 }, { NS_DP83848C_PHY_ID, 0xfffffff0 }, { TLK10X_PHY_ID, 0xfffffff0 }, + { TI_DP83822_PHY_ID, 0xfffffff0 }, { } }; MODULE_DEVICE_TABLE(mdio, dp83848_tbl); @@ -105,6 +107,7 @@ static struct phy_driver dp83848_driver[] = { DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"), DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"), DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"), + DP83848_PHY_DRIVER(TI_DP83822_PHY_ID, "TI DP83822 10/100 Mbps PHY"), }; module_phy_driver(dp83848_driver); -- cgit v0.10.2 From 087892d29b75c025086d99b29d385a3dac0169fc Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sat, 29 Oct 2016 17:04:35 +0300 Subject: qede: Fix out-of-bound fastpath memory access Driver allocates a shadow array for transmitted SKBs with X entries; That means valid indices are {0,...,X - 1}. [X == 8191] Problem is the driver also uses X as a mask for a producer/consumer in order to choose the right entry in the array which allows access to entry X which is out of bounds. To fix this, simply allocate X + 1 entries in the shadow array. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 444b271..7def29a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2940,7 +2940,7 @@ static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) txq->num_tx_buffers = edev->q_num_tx_buffers; /* Allocate the parallel driver ring for Tx buffers */ - size = sizeof(*txq->sw_tx_ring) * NUM_TX_BDS_MAX; + size = sizeof(*txq->sw_tx_ring) * TX_RING_SIZE; txq->sw_tx_ring = kzalloc(size, GFP_KERNEL); if (!txq->sw_tx_ring) { DP_NOTICE(edev, "Tx buffers ring allocation failed\n"); @@ -2951,7 +2951,7 @@ static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) QED_CHAIN_USE_TO_CONSUME_PRODUCE, QED_CHAIN_MODE_PBL, QED_CHAIN_CNT_TYPE_U16, - NUM_TX_BDS_MAX, + TX_RING_SIZE, sizeof(*p_virt), &txq->tx_pbl); if (rc) goto err; -- cgit v0.10.2 From c6fcc4fc5f8b592600c7409e769ab68da0fb1eca Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Fri, 28 Oct 2016 09:59:15 -0700 Subject: vxlan: avoid using stale vxlan socket. When vxlan device is closed vxlan socket is freed. This operation can race with vxlan-xmit function which dereferences vxlan socket. Following patch uses RCU mechanism to avoid this situation. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c1639a3..f3c2fa3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -943,17 +943,20 @@ static bool vxlan_snoop(struct net_device *dev, static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) { struct vxlan_dev *vxlan; + struct vxlan_sock *sock4; + struct vxlan_sock *sock6 = NULL; unsigned short family = dev->default_dst.remote_ip.sa.sa_family; + sock4 = rtnl_dereference(dev->vn4_sock); + /* The vxlan_sock is only used by dev, leaving group has * no effect on other vxlan devices. */ - if (family == AF_INET && dev->vn4_sock && - atomic_read(&dev->vn4_sock->refcnt) == 1) + if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1) return false; #if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6 && dev->vn6_sock && - atomic_read(&dev->vn6_sock->refcnt) == 1) + sock6 = rtnl_dereference(dev->vn6_sock); + if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1) return false; #endif @@ -961,10 +964,12 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) if (!netif_running(vxlan->dev) || vxlan == dev) continue; - if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock) + if (family == AF_INET && + rtnl_dereference(vxlan->vn4_sock) != sock4) continue; #if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock) + if (family == AF_INET6 && + rtnl_dereference(vxlan->vn6_sock) != sock6) continue; #endif @@ -1005,22 +1010,25 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) static void vxlan_sock_release(struct vxlan_dev *vxlan) { - bool ipv4 = __vxlan_sock_release_prep(vxlan->vn4_sock); + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); #if IS_ENABLED(CONFIG_IPV6) - bool ipv6 = __vxlan_sock_release_prep(vxlan->vn6_sock); + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + rcu_assign_pointer(vxlan->vn6_sock, NULL); #endif + rcu_assign_pointer(vxlan->vn4_sock, NULL); synchronize_net(); - if (ipv4) { - udp_tunnel_sock_release(vxlan->vn4_sock->sock); - kfree(vxlan->vn4_sock); + if (__vxlan_sock_release_prep(sock4)) { + udp_tunnel_sock_release(sock4->sock); + kfree(sock4); } #if IS_ENABLED(CONFIG_IPV6) - if (ipv6) { - udp_tunnel_sock_release(vxlan->vn6_sock->sock); - kfree(vxlan->vn6_sock); + if (__vxlan_sock_release_prep(sock6)) { + udp_tunnel_sock_release(sock6->sock); + kfree(sock6); } #endif } @@ -1036,18 +1044,21 @@ static int vxlan_igmp_join(struct vxlan_dev *vxlan) int ret = -EINVAL; if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { - sk = vxlan->vn6_sock->sock->sk; + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, &ip->sin6.sin6_addr); @@ -1067,18 +1078,21 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) int ret = -EINVAL; if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { - sk = vxlan->vn6_sock->sock->sk; + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, &ip->sin6.sin6_addr); @@ -1828,11 +1842,15 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct dst_entry *ndst; struct flowi6 fl6; int err; + if (!sock6) + return ERR_PTR(-EIO); + if (tos && !info) use_cache = false; if (use_cache) { @@ -1850,7 +1868,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, fl6.flowi6_proto = IPPROTO_UDP; err = ipv6_stub->ipv6_dst_lookup(vxlan->net, - vxlan->vn6_sock->sock->sk, + sock6->sock->sk, &ndst, &fl6); if (err < 0) return ERR_PTR(err); @@ -1995,9 +2013,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { - if (!vxlan->vn4_sock) + struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + + if (!sock4) goto drop; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, @@ -2050,12 +2070,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); struct dst_entry *ndst; u32 rt6i_flags; - if (!vxlan->vn6_sock) + if (!sock6) goto drop; - sk = vxlan->vn6_sock->sock->sk; + sk = sock6->sock->sk; ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, @@ -2415,9 +2436,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) dport = info->key.tp_dst ? : vxlan->cfg.dst_port; if (ip_tunnel_info_af(info) == AF_INET) { + struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; - if (!vxlan->vn4_sock) + if (!sock4) return -EINVAL; rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, info->key.u.ipv4.dst, @@ -2429,8 +2451,6 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) struct dst_entry *ndst; - if (!vxlan->vn6_sock) - return -EINVAL; ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, &info->key.u.ipv6.src, NULL, info); @@ -2740,10 +2760,10 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) return PTR_ERR(vs); #if IS_ENABLED(CONFIG_IPV6) if (ipv6) - vxlan->vn6_sock = vs; + rcu_assign_pointer(vxlan->vn6_sock, vs); else #endif - vxlan->vn4_sock = vs; + rcu_assign_pointer(vxlan->vn4_sock, vs); vxlan_vs_add_dev(vs, vxlan); return 0; } @@ -2754,9 +2774,9 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan) bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA; int ret = 0; - vxlan->vn4_sock = NULL; + RCU_INIT_POINTER(vxlan->vn4_sock, NULL); #if IS_ENABLED(CONFIG_IPV6) - vxlan->vn6_sock = NULL; + RCU_INIT_POINTER(vxlan->vn6_sock, NULL); if (ipv6 || metadata) ret = __vxlan_sock_add(vxlan, true); #endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0255613..308adc4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -225,9 +225,9 @@ struct vxlan_config { struct vxlan_dev { struct hlist_node hlist; /* vni hash table */ struct list_head next; /* vxlan's per namespace list */ - struct vxlan_sock *vn4_sock; /* listening socket for IPv4 */ + struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) - struct vxlan_sock *vn6_sock; /* listening socket for IPv6 */ + struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */ #endif struct net_device *dev; struct net *net; /* netns for packet i/o */ -- cgit v0.10.2 From fceb9c3e38252992bbf1a3028cc2f7b871211533 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Fri, 28 Oct 2016 09:59:16 -0700 Subject: geneve: avoid using stale geneve socket. This patch is similar to earlier vxlan patch. Geneve device close operation frees geneve socket. This operation can race with geneve-xmit function which dereferences geneve socket. Following patch uses RCU mechanism to avoid this situation. Signed-off-by: Pravin B Shelar Acked-by: John W. Linville Signed-off-by: David S. Miller diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 16af1ce..42edd7b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -58,9 +58,9 @@ struct geneve_dev { struct hlist_node hlist; /* vni hash table */ struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for geneve tunnel */ - struct geneve_sock *sock4; /* IPv4 socket used for geneve tunnel */ + struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ #if IS_ENABLED(CONFIG_IPV6) - struct geneve_sock *sock6; /* IPv6 socket used for geneve tunnel */ + struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ #endif u8 vni[3]; /* virtual network ID for tunnel */ u8 ttl; /* TTL override */ @@ -543,9 +543,19 @@ static void __geneve_sock_release(struct geneve_sock *gs) static void geneve_sock_release(struct geneve_dev *geneve) { - __geneve_sock_release(geneve->sock4); + struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); #if IS_ENABLED(CONFIG_IPV6) - __geneve_sock_release(geneve->sock6); + struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); + + rcu_assign_pointer(geneve->sock6, NULL); +#endif + + rcu_assign_pointer(geneve->sock4, NULL); + synchronize_net(); + + __geneve_sock_release(gs4); +#if IS_ENABLED(CONFIG_IPV6) + __geneve_sock_release(gs6); #endif } @@ -586,10 +596,10 @@ out: gs->flags = geneve->flags; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) - geneve->sock6 = gs; + rcu_assign_pointer(geneve->sock6, gs); else #endif - geneve->sock4 = gs; + rcu_assign_pointer(geneve->sock4, gs); hash = geneve_net_vni_hash(geneve->vni); hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); @@ -603,9 +613,7 @@ static int geneve_open(struct net_device *dev) bool metadata = geneve->collect_md; int ret = 0; - geneve->sock4 = NULL; #if IS_ENABLED(CONFIG_IPV6) - geneve->sock6 = NULL; if (ipv6 || metadata) ret = geneve_sock_add(geneve, true); #endif @@ -720,6 +728,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct rtable *rt = NULL; __u8 tos; + if (!rcu_dereference(geneve->sock4)) + return ERR_PTR(-EIO); + memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_mark = skb->mark; fl4->flowi4_proto = IPPROTO_UDP; @@ -772,11 +783,15 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); - struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; struct dst_cache *dst_cache; + struct geneve_sock *gs6; __u8 prio; + gs6 = rcu_dereference(geneve->sock6); + if (!gs6) + return ERR_PTR(-EIO); + memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_mark = skb->mark; fl6->flowi6_proto = IPPROTO_UDP; @@ -842,7 +857,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_info *info) { struct geneve_dev *geneve = netdev_priv(dev); - struct geneve_sock *gs4 = geneve->sock4; + struct geneve_sock *gs4; struct rtable *rt = NULL; const struct iphdr *iip; /* interior IP header */ int err = -EINVAL; @@ -853,6 +868,10 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); u32 flags = geneve->flags; + gs4 = rcu_dereference(geneve->sock4); + if (!gs4) + goto tx_error; + if (geneve->collect_md) { if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); @@ -932,9 +951,9 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_info *info) { struct geneve_dev *geneve = netdev_priv(dev); - struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; const struct iphdr *iip; /* interior IP header */ + struct geneve_sock *gs6; int err = -EINVAL; struct flowi6 fl6; __u8 prio, ttl; @@ -943,6 +962,10 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); u32 flags = geneve->flags; + gs6 = rcu_dereference(geneve->sock6); + if (!gs6) + goto tx_error; + if (geneve->collect_md) { if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); -- cgit v0.10.2