From 862474f8b46f6c1e600d4934e40ba40646c696ec Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Tue, 11 Feb 2014 11:01:23 +0100 Subject: can: kvaser_usb: check number of channels returned by HW It is needed to check the number of channels returned by the HW because it cannot be greater than MAX_NET_DEVICES otherwise it will crash. Signed-off-by: Olivier Sobrie Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 6c859bb..e77d110 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -473,6 +473,8 @@ static int kvaser_usb_get_card_info(struct kvaser_usb *dev) return err; dev->nchannels = msg.u.cardinfo.nchannels; + if (dev->nchannels > MAX_NET_DEVICES) + return -EINVAL; return 0; } -- cgit v0.10.2 From 205e2210daa975d92ace485a65a31ccc4077fe1a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 12 Feb 2014 15:15:05 +0200 Subject: iwlwifi: disable TX AMPDU by default for iwldvm NICs supported by iwldvm don't handle well TX AMPDU. Disable it by default, still leave the possibility to the user to force enable it with a debug parameter. NICs supported by iwlmvm don't suffer from the same issue, leave TX AMPDU enabled by default for these. Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index c24d1d3..73086c1 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -696,6 +696,24 @@ static int iwlagn_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, return ret; } +static inline bool iwl_enable_rx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) + return false; + return true; +} + +static inline bool iwl_enable_tx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) + return false; + if (iwlwifi_mod_params.disable_11n & IWL_ENABLE_HT_TXAGG) + return true; + + /* disabled by default */ + return false; +} + static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, @@ -717,7 +735,7 @@ static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_RX_START: - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) + if (!iwl_enable_rx_ampdu(priv->cfg)) break; IWL_DEBUG_HT(priv, "start Rx\n"); ret = iwl_sta_rx_agg_start(priv, sta, tid, *ssn); @@ -729,7 +747,7 @@ static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, case IEEE80211_AMPDU_TX_START: if (!priv->trans->ops->txq_enable) break; - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) + if (!iwl_enable_tx_ampdu(priv->cfg)) break; IWL_DEBUG_HT(priv, "start Tx\n"); ret = iwlagn_tx_agg_start(priv, vif, sta, tid, ssn); diff --git a/drivers/net/wireless/iwlwifi/iwl-drv.c b/drivers/net/wireless/iwlwifi/iwl-drv.c index c372816..7510355 100644 --- a/drivers/net/wireless/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/iwlwifi/iwl-drv.c @@ -1286,7 +1286,7 @@ module_param_named(swcrypto, iwlwifi_mod_params.sw_crypto, int, S_IRUGO); MODULE_PARM_DESC(swcrypto, "using crypto in software (default 0 [hardware])"); module_param_named(11n_disable, iwlwifi_mod_params.disable_11n, uint, S_IRUGO); MODULE_PARM_DESC(11n_disable, - "disable 11n functionality, bitmap: 1: full, 2: agg TX, 4: agg RX"); + "disable 11n functionality, bitmap: 1: full, 2: disable agg TX, 4: disable agg RX, 8 enable agg TX"); module_param_named(amsdu_size_8K, iwlwifi_mod_params.amsdu_size_8K, int, S_IRUGO); MODULE_PARM_DESC(amsdu_size_8K, "enable 8K amsdu size (default 0)"); diff --git a/drivers/net/wireless/iwlwifi/iwl-modparams.h b/drivers/net/wireless/iwlwifi/iwl-modparams.h index 0a84ade..b29075c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-modparams.h +++ b/drivers/net/wireless/iwlwifi/iwl-modparams.h @@ -79,9 +79,12 @@ enum iwl_power_level { IWL_POWER_NUM }; -#define IWL_DISABLE_HT_ALL BIT(0) -#define IWL_DISABLE_HT_TXAGG BIT(1) -#define IWL_DISABLE_HT_RXAGG BIT(2) +enum iwl_disable_11n { + IWL_DISABLE_HT_ALL = BIT(0), + IWL_DISABLE_HT_TXAGG = BIT(1), + IWL_DISABLE_HT_RXAGG = BIT(2), + IWL_ENABLE_HT_TXAGG = BIT(3), +}; /** * struct iwl_mod_params @@ -90,7 +93,7 @@ enum iwl_power_level { * * @sw_crypto: using hardware encryption, default = 0 * @disable_11n: disable 11n capabilities, default = 0, - * use IWL_DISABLE_HT_* constants + * use IWL_[DIS,EN]ABLE_HT_* constants * @amsdu_size_8K: enable 8K amsdu size, default = 0 * @restart_fw: restart firmware, default = 1 * @wd_disable: enable stuck queue check, default = 0 diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 6bf9766..c35b866 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -328,6 +328,24 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, ieee80211_free_txskb(hw, skb); } +static inline bool iwl_enable_rx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) + return false; + return true; +} + +static inline bool iwl_enable_tx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) + return false; + if (iwlwifi_mod_params.disable_11n & IWL_ENABLE_HT_TXAGG) + return true; + + /* enabled by default */ + return true; +} + static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, @@ -347,7 +365,7 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_RX_START: - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) { + if (!iwl_enable_rx_ampdu(mvm->cfg)) { ret = -EINVAL; break; } @@ -357,7 +375,7 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, ret = iwl_mvm_sta_rx_agg(mvm, sta, tid, 0, false); break; case IEEE80211_AMPDU_TX_START: - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) { + if (!iwl_enable_tx_ampdu(mvm->cfg)) { ret = -EINVAL; break; } -- cgit v0.10.2 From f78bccd79ba3cd9d9664981b501d57bdb81ab8a4 Mon Sep 17 00:00:00 2001 From: Olivier Langlois Date: Sat, 1 Feb 2014 01:11:09 -0500 Subject: rtlwifi: rtl8192ce: Fix too long disable of IRQs rtl8192ce is disabling for too long the local interrupts during hw initiatialisation when performing scans The observable symptoms in dmesg can be: - underruns from ALSA playback - clock freezes (tstamps do not change for several dmesg entries until irqs are finaly reenabled): [ 250.817669] rtlwifi:rtl_op_config():<0-0-0> 0x100 [ 250.817685] rtl8192ce:_rtl92ce_phy_set_rf_power_state():<0-1-0> IPS Set eRf nic enable [ 250.817732] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.817796] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.817910] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818024] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818139] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818253] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818367] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818472] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818472] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818472] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818472] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:18051d59:11 [ 250.818472] rtl8192ce:_rtl92ce_init_mac():<0-1-0> reg0xec:98053f15:10 [ 250.818472] rtl8192ce:rtl92ce_sw_led_on():<0-1-0> LedAddr:4E ledpin=1 [ 250.818472] rtl8192c_common:rtl92c_download_fw():<0-1-0> Firmware Version(49), Signature(0x88c1),Size(32) [ 250.818472] rtl8192ce:rtl92ce_enable_hw_security_config():<0-1-0> PairwiseEncAlgorithm = 0 GroupEncAlgorithm = 0 [ 250.818472] rtl8192ce:rtl92ce_enable_hw_security_config():<0-1-0> The SECR-value cc [ 250.818472] rtl8192c_common:rtl92c_dm_check_txpower_tracking_thermal_meter():<0-1-0> Schedule TxPowerTracking direct call!! [ 250.818472] rtl8192c_common:rtl92c_dm_txpower_tracking_callback_thermalmeter():<0-1-0> rtl92c_dm_txpower_tracking_callback_thermalmeter [ 250.818472] rtl8192c_common:rtl92c_dm_txpower_tracking_callback_thermalmeter():<0-1-0> Readback Thermal Meter = 0xe pre thermal meter 0xf eeprom_thermalmeter 0xf [ 250.818472] rtl8192c_common:rtl92c_dm_txpower_tracking_callback_thermalmeter():<0-1-0> Initial pathA ele_d reg0xc80 = 0x40000000, ofdm_index=0xc [ 250.818472] rtl8192c_common:rtl92c_dm_txpower_tracking_callback_thermalmeter():<0-1-0> Initial reg0xa24 = 0x90e1317, cck_index=0xc, ch14 0 [ 250.818472] rtl8192c_common:rtl92c_dm_txpower_tracking_callback_thermalmeter():<0-1-0> Readback Thermal Meter = 0xe pre thermal meter 0xf eeprom_thermalmeter 0xf delta 0x1 delta_lck 0x0 delta_iqk 0x0 [ 250.818472] rtl8192c_common:rtl92c_dm_txpower_tracking_callback_thermalmeter():<0-1-0> <=== [ 250.818472] rtl8192c_common:rtl92c_dm_initialize_txpower_tracking_thermalmeter():<0-1-0> pMgntInfo->txpower_tracking = 1 [ 250.818472] rtl8192ce:rtl92ce_led_control():<0-1-0> ledaction 3 [ 250.818472] rtl8192ce:rtl92ce_sw_led_on():<0-1-0> LedAddr:4E ledpin=1 [ 250.818472] rtlwifi:rtl_ips_nic_on():<0-1-0> before spin_unlock_irqrestore [ 251.154656] PCM: Lost interrupts? [Q]-0 (stream=0, delta=15903, new_hw_ptr=293408, old_hw_ptr=277505) The exact code flow that causes that is: 1. wpa_supplicant send a start_scan request to the nl80211 driver 2. mac80211 module call rtl_op_config with IEEE80211_CONF_CHANGE_IDLE 3. rtl_ips_nic_on is called which disable local irqs 4. rtl92c_phy_set_rf_power_state() is called 5. rtl_ps_enable_nic() is called and hw_init()is executed and then the interrupts on the device are enabled A good solution could be to refactor the code to avoid calling rtl92ce_hw_init() with the irqs disabled but a quick and dirty solution that has proven to work is to reenable the irqs during the function rtl92ce_hw_init(). I think that it is safe doing so since the device interrupt will only be enabled after the init function succeed. Signed-off-by: Olivier Langlois Cc: Stable Acked-by: Larry Finger Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c index a82b30a..2eb0b38 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c @@ -937,14 +937,26 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw) bool is92c; int err; u8 tmp_u1b; + unsigned long flags; rtlpci->being_init_adapter = true; + + /* Since this function can take a very long time (up to 350 ms) + * and can be called with irqs disabled, reenable the irqs + * to let the other devices continue being serviced. + * + * It is safe doing so since our own interrupts will only be enabled + * in a subsequent step. + */ + local_save_flags(flags); + local_irq_enable(); + rtlpriv->intf_ops->disable_aspm(hw); rtstatus = _rtl92ce_init_mac(hw); if (!rtstatus) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Init MAC failed\n"); err = 1; - return err; + goto exit; } err = rtl92c_download_fw(hw); @@ -952,7 +964,7 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "Failed to download FW. Init HW without FW now..\n"); err = 1; - return err; + goto exit; } rtlhal->last_hmeboxnum = 0; @@ -1032,6 +1044,8 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, "under 1.5V\n"); } rtl92c_dm_init(hw); +exit: + local_irq_restore(flags); rtlpci->being_init_adapter = false; return err; } -- cgit v0.10.2 From 2e8c5e56b307271c2dab6f8bfd1d8a3822ca2390 Mon Sep 17 00:00:00 2001 From: Olivier Langlois Date: Sat, 1 Feb 2014 01:11:10 -0500 Subject: rtlwifi: Fix incorrect return from rtl_ps_enable_nic() rtl_ps_enable_nic() is called from loops that will loop until this function returns true or a maximum number of retries is performed. hw_init() returns non-zero on error. In that situation return false to restore the original design intent to retry hw init when it fails. Signed-off-by: Olivier Langlois Cc: Stable Acked-by: Larry Finger Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rtlwifi/ps.c b/drivers/net/wireless/rtlwifi/ps.c index deedae3..d1c0191 100644 --- a/drivers/net/wireless/rtlwifi/ps.c +++ b/drivers/net/wireless/rtlwifi/ps.c @@ -48,7 +48,7 @@ bool rtl_ps_enable_nic(struct ieee80211_hw *hw) /*<2> Enable Adapter */ if (rtlpriv->cfg->ops->hw_init(hw)) - return 1; + return false; RT_CLEAR_PS_LEVEL(ppsc, RT_RF_OFF_LEVL_HALT_NIC); /*<3> Enable Interrupt */ -- cgit v0.10.2 From 4a0732d1f93ff52b0e74a61302260b9417f6db61 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 7 Feb 2014 14:50:38 +0300 Subject: ath5k: shifting the wrong variable for AR5K_AR5210 In the original code we shift "AR5K_PHY(256) >> 28" which is zero but the intent was to shift the return value of ath5k_hw_reg_read() like we do a couple lines later. Signed-off-by: Dan Carpenter Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c index d6bc7cb..1a2973b 100644 --- a/drivers/net/wireless/ath/ath5k/phy.c +++ b/drivers/net/wireless/ath/ath5k/phy.c @@ -110,7 +110,7 @@ ath5k_hw_radio_revision(struct ath5k_hw *ah, enum ieee80211_band band) ath5k_hw_reg_write(ah, 0x00010000, AR5K_PHY(0x20)); if (ah->ah_version == AR5K_AR5210) { - srev = ath5k_hw_reg_read(ah, AR5K_PHY(256) >> 28) & 0xf; + srev = (ath5k_hw_reg_read(ah, AR5K_PHY(256)) >> 28) & 0xf; ret = (u16)ath5k_hw_bitswap(srev, 4) + 1; } else { srev = (ath5k_hw_reg_read(ah, AR5K_PHY(0x100)) >> 24) & 0xff; -- cgit v0.10.2 From b6213e413a4e0c66548153516b074df14f9d08e0 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 10 Feb 2014 22:38:28 +0100 Subject: rtl8187: fix regression on MIPS without coherent DMA This patch fixes regression caused by commit a16dad77634 "MIPS: Fix potencial corruption". That commit fixes one corruption scenario in cost of adding another one, which actually start to cause crashes on Yeeloong laptop when rtl8187 driver is used. For correct DMA read operation on machines without DMA coherence, kernel have to invalidate cache, such it will refill later with new data that device wrote to memory, when that data is needed to process. We can only invalidate full cache line. Hence when cache line includes both dma buffer and some other data (written in cache, but not yet in main memory), the other data can not hit memory due to invalidation. That happen on rtl8187 where struct rtl8187_priv fields are located just before and after small buffers that are passed to USB layer and DMA is performed on them. To fix the problem we align buffers and reserve space after them to make them match cache line. This patch does not resolve all possible MIPS problems entirely, for that we have to assure that we always map cache aligned buffers for DMA, what can be complex or even not possible. But patch fixes visible and reproducible regression and seems other possible corruptions do not happen in practice, since Yeeloong laptop works stable without rtl8187 driver. Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=54391 Reported-by: Petr Pisar Bisected-by: Tom Li Reported-and-tested-by: Tom Li Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Larry Finger Acked-by: Hin-Tak Leung Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h b/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h index 56aee06..a6ad79f 100644 --- a/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h +++ b/drivers/net/wireless/rtl818x/rtl8187/rtl8187.h @@ -15,6 +15,8 @@ #ifndef RTL8187_H #define RTL8187_H +#include + #include "rtl818x.h" #include "leds.h" @@ -139,7 +141,10 @@ struct rtl8187_priv { u8 aifsn[4]; u8 rfkill_mask; struct { - __le64 buf; + union { + __le64 buf; + u8 dummy1[L1_CACHE_BYTES]; + } ____cacheline_aligned; struct sk_buff_head queue; } b_tx_status; /* This queue is used by both -b and non-b devices */ struct mutex io_mutex; @@ -147,7 +152,8 @@ struct rtl8187_priv { u8 bits8; __le16 bits16; __le32 bits32; - } *io_dmabuf; + u8 dummy2[L1_CACHE_BYTES]; + } *io_dmabuf ____cacheline_aligned; bool rfkill_off; u16 seqno; }; -- cgit v0.10.2 From 4885c8731a34eecf509822e089ce17bcd9bd4650 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Wed, 12 Feb 2014 22:38:17 +0000 Subject: hostap: fix "hostap: proc: Use remove_proc_subtree()" remove_proc_subtree() doesn't work here as local->ddev has already been removed, and NULLed out. Use proc_remove() instead. Signed-off-by: Russell King Tested-by: Russell King Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/hostap/hostap_proc.c b/drivers/net/wireless/hostap/hostap_proc.c index aa7ad3a..4e5c0f8 100644 --- a/drivers/net/wireless/hostap/hostap_proc.c +++ b/drivers/net/wireless/hostap/hostap_proc.c @@ -496,7 +496,7 @@ void hostap_init_proc(local_info_t *local) void hostap_remove_proc(local_info_t *local) { - remove_proc_subtree(local->ddev->name, hostap_proc); + proc_remove(local->proc); } -- cgit v0.10.2 From c7966b525fc6af6d18a509b33f938312f59b57f5 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Mon, 10 Feb 2014 21:40:51 +0100 Subject: net: fix macvtap type name in Kconfig The netlink kind (and iproute2 type option) is actually called 'macvtap', not 'macvlan'. Signed-off-by: Jan Luebbe Signed-off-by: David S. Miller diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f342278..494b888 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -139,7 +139,7 @@ config MACVTAP This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device can be added in the same way as a macvlan device, using 'type - macvlan', and then be accessed through the tap user space interface. + macvtap', and then be accessed through the tap user space interface. To compile this driver as a module, choose M here: the module will be called macvtap. -- cgit v0.10.2 From d15891ca1fdd7f1f46cede1dc15bcfbf0445a658 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 11 Feb 2014 09:59:57 +0000 Subject: net: stmmac:sti: Add STi SOC glue driver. STi series SOCs have a glue layer on top of the synopsis gmac IP, this glue layer needs to be configured before the gmac driver starts using the IP. This patch adds a support to this glue layer which is configured via stmmac setup, init, exit callbacks. Signed-off-by: Srinivas Kandagatla Signed-off-by: David S. Miller diff --git a/Documentation/devicetree/bindings/net/sti-dwmac.txt b/Documentation/devicetree/bindings/net/sti-dwmac.txt new file mode 100644 index 0000000..3dd3d0b --- /dev/null +++ b/Documentation/devicetree/bindings/net/sti-dwmac.txt @@ -0,0 +1,58 @@ +STMicroelectronics SoC DWMAC glue layer controller + +The device node has following properties. + +Required properties: + - compatible : Can be "st,stih415-dwmac", "st,stih416-dwmac" or + "st,stid127-dwmac". + - reg : Offset of the glue configuration register map in system + configuration regmap pointed by st,syscon property and size. + + - reg-names : Should be "sti-ethconf". + + - st,syscon : Should be phandle to system configuration node which + encompases this glue registers. + + - st,tx-retime-src: On STi Parts for Giga bit speeds, 125Mhz clocks can be + wired up in from different sources. One via TXCLK pin and other via CLK_125 + pin. This wiring is totally board dependent. However the retiming glue + logic should be configured accordingly. Possible values for this property + + "txclk" - if 125Mhz clock is wired up via txclk line. + "clk_125" - if 125Mhz clock is wired up via clk_125 line. + + This property is only valid for Giga bit setup( GMII, RGMII), and it is + un-used for non-giga bit (MII and RMII) setups. Also note that internal + clockgen can not generate stable 125Mhz clock. + + - st,ext-phyclk: This boolean property indicates who is generating the clock + for tx and rx. This property is only valid for RMII case where the clock can + be generated from the MAC or PHY. + + - clock-names: should be "sti-ethclk". + - clocks: Should point to ethernet clockgen which can generate phyclk. + + +Example: + +ethernet0: dwmac@fe810000 { + device_type = "network"; + compatible = "st,stih416-dwmac", "snps,dwmac", "snps,dwmac-3.710"; + reg = <0xfe810000 0x8000>, <0x8bc 0x4>; + reg-names = "stmmaceth", "sti-ethconf"; + interrupts = <0 133 0>, <0 134 0>, <0 135 0>; + interrupt-names = "macirq", "eth_wake_irq", "eth_lpi"; + phy-mode = "mii"; + + st,syscon = <&syscfg_rear>; + + snps,pbl = <32>; + snps,mixed-burst; + + resets = <&softreset STIH416_ETH0_SOFTRESET>; + reset-names = "stmmaceth"; + pinctrl-0 = <&pinctrl_mii0>; + pinctrl-names = "default"; + clocks = <&CLK_S_GMAC0_PHY>; + clock-names = "stmmaceth"; +}; diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index e2f202e..f2d7c70 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -37,6 +37,17 @@ config DWMAC_SUNXI stmmac device driver. This driver is used for A20/A31 GMAC ethernet controller. +config DWMAC_STI + bool "STi GMAC support" + depends on STMMAC_PLATFORM && ARCH_STI + default y + ---help--- + Support for ethernet controller on STi SOCs. + + This selects STi SoC glue layer support for the stmmac + device driver. This driver is used on for the STi series + SOCs GMAC ethernet controller. + config STMMAC_PCI bool "STMMAC PCI bus support" depends on STMMAC_ETH && PCI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index ecadece..dcef287 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_STMMAC_ETH) += stmmac.o stmmac-$(CONFIG_STMMAC_PLATFORM) += stmmac_platform.o stmmac-$(CONFIG_STMMAC_PCI) += stmmac_pci.o stmmac-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o +stmmac-$(CONFIG_DWMAC_STI) += dwmac-sti.o stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \ dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c new file mode 100644 index 0000000..552bbc1 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -0,0 +1,330 @@ +/** + * dwmac-sti.c - STMicroelectronics DWMAC Specific Glue layer + * + * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited + * Author: Srinivas Kandagatla + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * STi GMAC glue logic. + * -------------------- + * + * _ + * | \ + * --------|0 \ ETH_SEL_INTERNAL_NOTEXT_PHYCLK + * phyclk | |___________________________________________ + * | | | (phyclk-in) + * --------|1 / | + * int-clk |_ / | + * | _ + * | | \ + * |_______|1 \ ETH_SEL_TX_RETIME_CLK + * | |___________________________ + * | | (tx-retime-clk) + * _______|0 / + * | |_ / + * _ | + * | \ | + * --------|0 \ | + * clk_125 | |__| + * | | ETH_SEL_TXCLK_NOT_CLK125 + * --------|1 / + * txclk |_ / + * + * + * ETH_SEL_INTERNAL_NOTEXT_PHYCLK is valid only for RMII where PHY can + * generate 50MHz clock or MAC can generate it. + * This bit is configured by "st,ext-phyclk" property. + * + * ETH_SEL_TXCLK_NOT_CLK125 is only valid for gigabit modes, where the 125Mhz + * clock either comes from clk-125 pin or txclk pin. This configuration is + * totally driven by the board wiring. This bit is configured by + * "st,tx-retime-src" property. + * + * TXCLK configuration is different for different phy interface modes + * and changes according to link speed in modes like RGMII. + * + * Below table summarizes the clock requirement and clock sources for + * supported phy interface modes with link speeds. + * ________________________________________________ + *| PHY_MODE | 1000 Mbit Link | 100 Mbit Link | + * ------------------------------------------------ + *| MII | n/a | 25Mhz | + *| | | txclk | + * ------------------------------------------------ + *| GMII | 125Mhz | 25Mhz | + *| | clk-125/txclk | txclk | + * ------------------------------------------------ + *| RGMII | 125Mhz | 25Mhz | + *| | clk-125/txclk | clkgen | + * ------------------------------------------------ + *| RMII | n/a | 25Mhz | + *| | |clkgen/phyclk-in | + * ------------------------------------------------ + * + * TX lines are always retimed with a clk, which can vary depending + * on the board configuration. Below is the table of these bits + * in eth configuration register depending on source of retime clk. + * + *--------------------------------------------------------------- + * src | tx_rt_clk | int_not_ext_phyclk | txclk_n_clk125| + *--------------------------------------------------------------- + * txclk | 0 | n/a | 1 | + *--------------------------------------------------------------- + * ck_125| 0 | n/a | 0 | + *--------------------------------------------------------------- + * phyclk| 1 | 0 | n/a | + *--------------------------------------------------------------- + * clkgen| 1 | 1 | n/a | + *--------------------------------------------------------------- + */ + + /* Register definition */ + + /* 3 bits [8:6] + * [6:6] ETH_SEL_TXCLK_NOT_CLK125 + * [7:7] ETH_SEL_INTERNAL_NOTEXT_PHYCLK + * [8:8] ETH_SEL_TX_RETIME_CLK + * + */ + +#define TX_RETIME_SRC_MASK GENMASK(8, 6) +#define ETH_SEL_TX_RETIME_CLK BIT(8) +#define ETH_SEL_INTERNAL_NOTEXT_PHYCLK BIT(7) +#define ETH_SEL_TXCLK_NOT_CLK125 BIT(6) + +#define ENMII_MASK GENMASK(5, 5) +#define ENMII BIT(5) + +/** + * 3 bits [4:2] + * 000-GMII/MII + * 001-RGMII + * 010-SGMII + * 100-RMII +*/ +#define MII_PHY_SEL_MASK GENMASK(4, 2) +#define ETH_PHY_SEL_RMII BIT(4) +#define ETH_PHY_SEL_SGMII BIT(3) +#define ETH_PHY_SEL_RGMII BIT(2) +#define ETH_PHY_SEL_GMII 0x0 +#define ETH_PHY_SEL_MII 0x0 + +#define IS_PHY_IF_MODE_RGMII(iface) (iface == PHY_INTERFACE_MODE_RGMII || \ + iface == PHY_INTERFACE_MODE_RGMII_ID || \ + iface == PHY_INTERFACE_MODE_RGMII_RXID || \ + iface == PHY_INTERFACE_MODE_RGMII_TXID) + +#define IS_PHY_IF_MODE_GBIT(iface) (IS_PHY_IF_MODE_RGMII(iface) || \ + iface == PHY_INTERFACE_MODE_GMII) + +struct sti_dwmac { + int interface; + bool ext_phyclk; + bool is_tx_retime_src_clk_125; + struct clk *clk; + int reg; + struct device *dev; + struct regmap *regmap; +}; + +static u32 phy_intf_sels[] = { + [PHY_INTERFACE_MODE_MII] = ETH_PHY_SEL_MII, + [PHY_INTERFACE_MODE_GMII] = ETH_PHY_SEL_GMII, + [PHY_INTERFACE_MODE_RGMII] = ETH_PHY_SEL_RGMII, + [PHY_INTERFACE_MODE_RGMII_ID] = ETH_PHY_SEL_RGMII, + [PHY_INTERFACE_MODE_SGMII] = ETH_PHY_SEL_SGMII, + [PHY_INTERFACE_MODE_RMII] = ETH_PHY_SEL_RMII, +}; + +enum { + TX_RETIME_SRC_NA = 0, + TX_RETIME_SRC_TXCLK = 1, + TX_RETIME_SRC_CLK_125, + TX_RETIME_SRC_PHYCLK, + TX_RETIME_SRC_CLKGEN, +}; + +static const char *const tx_retime_srcs[] = { + [TX_RETIME_SRC_NA] = "", + [TX_RETIME_SRC_TXCLK] = "txclk", + [TX_RETIME_SRC_CLK_125] = "clk_125", + [TX_RETIME_SRC_PHYCLK] = "phyclk", + [TX_RETIME_SRC_CLKGEN] = "clkgen", +}; + +static u32 tx_retime_val[] = { + [TX_RETIME_SRC_TXCLK] = ETH_SEL_TXCLK_NOT_CLK125, + [TX_RETIME_SRC_CLK_125] = 0x0, + [TX_RETIME_SRC_PHYCLK] = ETH_SEL_TX_RETIME_CLK, + [TX_RETIME_SRC_CLKGEN] = ETH_SEL_TX_RETIME_CLK | + ETH_SEL_INTERNAL_NOTEXT_PHYCLK, +}; + +static void setup_retime_src(struct sti_dwmac *dwmac, u32 spd) +{ + u32 src = 0, freq = 0; + + if (spd == SPEED_100) { + if (dwmac->interface == PHY_INTERFACE_MODE_MII || + dwmac->interface == PHY_INTERFACE_MODE_GMII) { + src = TX_RETIME_SRC_TXCLK; + } else if (dwmac->interface == PHY_INTERFACE_MODE_RMII) { + if (dwmac->ext_phyclk) { + src = TX_RETIME_SRC_PHYCLK; + } else { + src = TX_RETIME_SRC_CLKGEN; + freq = 50000000; + } + + } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) { + src = TX_RETIME_SRC_CLKGEN; + freq = 25000000; + } + + if (src == TX_RETIME_SRC_CLKGEN && dwmac->clk) + clk_set_rate(dwmac->clk, freq); + + } else if (spd == SPEED_1000) { + if (dwmac->is_tx_retime_src_clk_125) + src = TX_RETIME_SRC_CLK_125; + else + src = TX_RETIME_SRC_TXCLK; + } + + regmap_update_bits(dwmac->regmap, dwmac->reg, + TX_RETIME_SRC_MASK, tx_retime_val[src]); +} + +static void sti_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct sti_dwmac *dwmac = priv; + + if (dwmac->clk) + clk_disable_unprepare(dwmac->clk); +} + +static void sti_fix_mac_speed(void *priv, unsigned int spd) +{ + struct sti_dwmac *dwmac = priv; + + setup_retime_src(dwmac, spd); + + return; +} + +static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, + struct platform_device *pdev) +{ + struct resource *res; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct regmap *regmap; + int err; + + if (!np) + return -EINVAL; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sti-ethconf"); + if (!res) + return -ENODATA; + + regmap = syscon_regmap_lookup_by_phandle(np, "st,syscon"); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + dwmac->dev = dev; + dwmac->interface = of_get_phy_mode(np); + dwmac->regmap = regmap; + dwmac->reg = res->start; + dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk"); + dwmac->is_tx_retime_src_clk_125 = false; + + if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) { + const char *rs; + + err = of_property_read_string(np, "st,tx-retime-src", &rs); + if (err < 0) { + dev_err(dev, "st,tx-retime-src not specified\n"); + return err; + } + + if (!strcasecmp(rs, "clk_125")) + dwmac->is_tx_retime_src_clk_125 = true; + } + + dwmac->clk = devm_clk_get(dev, "sti-ethclk"); + + if (IS_ERR(dwmac->clk)) + dwmac->clk = NULL; + + return 0; +} + +static int sti_dwmac_init(struct platform_device *pdev, void *priv) +{ + struct sti_dwmac *dwmac = priv; + struct regmap *regmap = dwmac->regmap; + int iface = dwmac->interface; + u32 reg = dwmac->reg; + u32 val, spd; + + if (dwmac->clk) + clk_prepare_enable(dwmac->clk); + + regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]); + + val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; + regmap_update_bits(regmap, reg, ENMII_MASK, val); + + if (IS_PHY_IF_MODE_GBIT(iface)) + spd = SPEED_1000; + else + spd = SPEED_100; + + setup_retime_src(dwmac, spd); + + return 0; +} + +static void *sti_dwmac_setup(struct platform_device *pdev) +{ + struct sti_dwmac *dwmac; + int ret; + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return ERR_PTR(-ENOMEM); + + ret = sti_dwmac_parse_data(dwmac, pdev); + if (ret) { + dev_err(&pdev->dev, "Unable to parse OF data\n"); + return ERR_PTR(ret); + } + + return dwmac; +} + +const struct stmmac_of_data sti_gmac_data = { + .fix_mac_speed = sti_fix_mac_speed, + .setup = sti_dwmac_setup, + .init = sti_dwmac_init, + .exit = sti_dwmac_exit, +}; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index d9af26e..f9e60d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -133,6 +133,9 @@ bool stmmac_eee_init(struct stmmac_priv *priv); #ifdef CONFIG_DWMAC_SUNXI extern const struct stmmac_of_data sun7i_gmac_data; #endif +#ifdef CONFIG_DWMAC_STI +extern const struct stmmac_of_data sti_gmac_data; +#endif extern struct platform_driver stmmac_pltfr_driver; static inline int stmmac_register_platform(void) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5884a7d..c61bc72b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -33,6 +33,11 @@ static const struct of_device_id stmmac_dt_ids[] = { #ifdef CONFIG_DWMAC_SUNXI { .compatible = "allwinner,sun7i-a20-gmac", .data = &sun7i_gmac_data}, #endif +#ifdef CONFIG_DWMAC_STI + { .compatible = "st,stih415-dwmac", .data = &sti_gmac_data}, + { .compatible = "st,stih416-dwmac", .data = &sti_gmac_data}, + { .compatible = "st,stih127-dwmac", .data = &sti_gmac_data}, +#endif /* SoC specific glue layers should come before generic bindings */ { .compatible = "st,spear600-gmac"}, { .compatible = "snps,dwmac-3.610"}, -- cgit v0.10.2 From 64380a04deeed4720de76b086a3a4eab8dd41671 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 11 Feb 2014 11:38:26 +0100 Subject: tipc: fix message corruption bug for deferred packets If a packet received on a link is out-of-sequence, it will be placed on a deferred queue and later reinserted in the receive path once the preceding packets have been processed. The problem with this is that it will be subject to the buffer adjustment from link_recv_buf_validate twice. The second adjustment for 20 bytes header space will corrupt the packet. We solve this by tagging the deferred packets and bail out from receive buffer validation for packets that have already been subjected to this. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/core.h b/net/tipc/core.h index 1ff477b..5569d96 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -192,6 +192,7 @@ static inline void k_term_timer(struct timer_list *timer) struct tipc_skb_cb { void *handle; + bool deferred; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) diff --git a/net/tipc/link.c b/net/tipc/link.c index d4b5de4..da6018b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1391,6 +1391,12 @@ static int link_recv_buf_validate(struct sk_buff *buf) u32 hdr_size; u32 min_hdr_size; + /* If this packet comes from the defer queue, the skb has already + * been validated + */ + if (unlikely(TIPC_SKB_CB(buf)->deferred)) + return 1; + if (unlikely(buf->len < MIN_H_SIZE)) return 0; @@ -1703,6 +1709,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, &l_ptr->newest_deferred_in, buf)) { l_ptr->deferred_inqueue_sz++; l_ptr->stats.deferred_recv++; + TIPC_SKB_CB(buf)->deferred = true; if ((l_ptr->deferred_inqueue_sz % 16) == 1) tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } else -- cgit v0.10.2 From da37705cef30841616ed644ff33455bbc7374db0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Feb 2014 15:51:29 -0800 Subject: macvlan: unregister net device when netdev_upper_dev_link() fails rtnl_newlink() doesn't unregister it for us on failure. Cc: Patrick McHardy Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 8433de4..a5d2189 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -879,14 +879,15 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, dev->priv_flags |= IFF_MACVLAN; err = netdev_upper_dev_link(lowerdev, dev); if (err) - goto destroy_port; - + goto unregister_netdev; list_add_tail_rcu(&vlan->list, &port->vlans); netif_stacked_transfer_operstate(lowerdev, dev); return 0; +unregister_netdev: + unregister_netdevice(dev); destroy_port: port->count -= 1; if (!port->count) -- cgit v0.10.2 From 0e0eee2465df77bcec2e8ff75432b8e57897b143 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Feb 2014 15:51:30 -0800 Subject: net: correct error path in rtnl_newlink() I saw the following BUG when ->newlink() fails in rtnl_newlink(): [ 40.240058] kernel BUG at net/core/dev.c:6438! this is due to free_netdev() is not supposed to be called before netdev is completely unregistered, therefore it is not correct to call free_netdev() here, at least for ops->newlink!=NULL case, many drivers call it in ->destructor so that rtnl_unlock() will take care of it, we probably don't need to do anything here. Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 048dc8d..1a0dac2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1963,16 +1963,21 @@ replay: dev->ifindex = ifm->ifi_index; - if (ops->newlink) + if (ops->newlink) { err = ops->newlink(net, dev, tb, data); - else + /* Drivers should call free_netdev() in ->destructor + * and unregister it on failure so that device could be + * finally freed in rtnl_unlock. + */ + if (err < 0) + goto out; + } else { err = register_netdevice(dev); - - if (err < 0) { - free_netdev(dev); - goto out; + if (err < 0) { + free_netdev(dev); + goto out; + } } - err = rtnl_configure_link(dev, ifm); if (err < 0) unregister_netdevice(dev); -- cgit v0.10.2 From 22a1f5140ed69baa5906ce9b2b9143478f5a4da0 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 12 Feb 2014 09:44:43 +0800 Subject: sctp: fix a missed .data initialization As commit 3c68198e75111a90("sctp: Make hmac algorithm selection for cookie generation dynamic"), we miss the .data initialization. If we don't use the net_namespace, the problem that parts of the sysctl configuration won't be isolation and won't occur. In sctp_sysctl_net_register(), we register the sysctl for each net, in the for(), we use the 'table[i].data' as check condition, so when the 'i' is the index of sctp_hmac_alg, the data is NULL, then break. So add the .data initialization. Acked-by: Neil Horman Signed-off-by: Wang Weidong Signed-off-by: David S. Miller diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 7135e61..d354de5 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -151,6 +151,7 @@ static struct ctl_table sctp_net_table[] = { }, { .procname = "cookie_hmac_alg", + .data = &init_net.sctp.sctp_hmac_alg, .maxlen = 8, .mode = 0644, .proc_handler = proc_sctp_do_hmac_alg, -- cgit v0.10.2 From efb842c45e667332774196bd5a1d539d048159cc Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 12 Feb 2014 09:44:44 +0800 Subject: sctp: optimize the sctp_sysctl_net_register Here, when the net is init_net, we needn't to kmemdup the ctl_table again. So add a check for net. Also we can save some memory. Signed-off-by: Wang Weidong Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index d354de5..35c8923 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -402,15 +402,18 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, int sctp_sysctl_net_register(struct net *net) { - struct ctl_table *table; - int i; + struct ctl_table *table = sctp_net_table; + + if (!net_eq(net, &init_net)) { + int i; - table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); - if (!table) - return -ENOMEM; + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; - for (i = 0; table[i].data; i++) - table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + } net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); return 0; -- cgit v0.10.2 From 455016e5dc623cb784dbaa4f197054ab87d84c76 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 11 Feb 2014 20:52:00 -0500 Subject: Documentation/networking: delete orphaned 3c505.txt file. In the commit 0e245dbaac9fa1c2fd0f4e2af7b9f6d874083a8b ("drivers/net: delete the 3Com 3c505/3c507 intel i825xx support") we clobbered the 3c505 driver (over a year ago) along with other abandoned ISA drivers. However, this orphaned README file escaped detection at that time, and has lived on until today. Get rid of it now. Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller diff --git a/Documentation/networking/3c505.txt b/Documentation/networking/3c505.txt deleted file mode 100644 index 72f38b1..0000000 --- a/Documentation/networking/3c505.txt +++ /dev/null @@ -1,45 +0,0 @@ -The 3Com Etherlink Plus (3c505) driver. - -This driver now uses DMA. There is currently no support for PIO operation. -The default DMA channel is 6; this is _not_ autoprobed, so you must -make sure you configure it correctly. If loading the driver as a -module, you can do this with "modprobe 3c505 dma=n". If the driver is -linked statically into the kernel, you must either use an "ether=" -statement on the command line, or change the definition of ELP_DMA in 3c505.h. - -The driver will warn you if it has to fall back on the compiled in -default DMA channel. - -If no base address is given at boot time, the driver will autoprobe -ports 0x300, 0x280 and 0x310 (in that order). If no IRQ is given, the driver -will try to probe for it. - -The driver can be used as a loadable module. - -Theoretically, one instance of the driver can now run multiple cards, -in the standard way (when loading a module, say "modprobe 3c505 -io=0x300,0x340 irq=10,11 dma=6,7" or whatever). I have not tested -this, though. - -The driver may now support revision 2 hardware; the dependency on -being able to read the host control register has been removed. This -is also untested, since I don't have a suitable card. - -Known problems: - I still see "DMA upload timed out" messages from time to time. These -seem to be fairly non-fatal though. - The card is old and slow. - -To do: - Improve probe/setup code - Test multicast and promiscuous operation - -Authors: - The driver is mainly written by Craig Southeren, email - . - Parts of the driver (adapting the driver to 1.1.4+ kernels, - IRQ/address detection, some changes) and this README by - Juha Laiho . - DMA mode, more fixes, etc, by Philip Blundell - Multicard support, Software configurable DMA, etc., by - Christopher Collins -- cgit v0.10.2 From f80889a5b79cae0b84465a90c21b1273a03b7973 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 12 Feb 2014 12:06:40 +0800 Subject: bonding: Fix deadlock in bonding driver when using netpoll The bonding driver take write locks and spin locks that are shared by the tx path in enslave processing and notification processing, If the netconsole is in use, the bonding can call printk which puts us in the netpoll tx path, if the netconsole is attached to the bonding driver, result in deadlock. So add protection for these place, by checking the netpoll_block_tx state, we can defer the sending of the netconsole frames until a later time using the retransmit feature of netpoll_send_skb that is triggered on the return code NETDEV_TX_BUSY. Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 71ba18e..8676649 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1543,9 +1543,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) bond_set_carrier(bond); if (USES_PRIMARY(bond->params.mode)) { + block_netpoll_tx(); write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); + unblock_netpoll_tx(); } pr_info("%s: enslaving %s as a%s interface with a%s link.\n", @@ -1571,10 +1573,12 @@ err_detach: if (bond->primary_slave == new_slave) bond->primary_slave = NULL; if (bond->curr_active_slave == new_slave) { + block_netpoll_tx(); write_lock_bh(&bond->curr_slave_lock); bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); + unblock_netpoll_tx(); } slave_disable_netpoll(new_slave); @@ -2864,9 +2868,12 @@ static int bond_slave_netdev_event(unsigned long event, pr_info("%s: Primary slave changed to %s, reselecting active slave.\n", bond->dev->name, bond->primary_slave ? slave_dev->name : "none"); + + block_netpoll_tx(); write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); + unblock_netpoll_tx(); break; case NETDEV_FEAT_CHANGE: bond_compute_features(bond); -- cgit v0.10.2 From d206940319c41df4299db75ed56142177bb2e5f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Feb 2014 23:09:11 +0100 Subject: net: core: introduce netif_skb_dev_features Will be used by upcoming ipv4 forward path change that needs to determine feature mask using skb->dst->dev instead of skb->dev. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 440a02e..21d4e6b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3068,7 +3068,12 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -netdev_features_t netif_skb_features(struct sk_buff *skb); +netdev_features_t netif_skb_dev_features(struct sk_buff *skb, + const struct net_device *dev); +static inline netdev_features_t netif_skb_features(struct sk_buff *skb) +{ + return netif_skb_dev_features(skb, skb->dev); +} static inline bool net_gso_ok(netdev_features_t features, int gso_type) { diff --git a/net/core/dev.c b/net/core/dev.c index 4ad1b78..b1b0c8d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2420,7 +2420,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2495,34 +2495,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - netdev_features_t features) + const struct net_device *dev, + netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, skb_network_protocol(skb))) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(skb->dev, skb)) { + } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_features(struct sk_buff *skb) +netdev_features_t netif_skb_dev_features(struct sk_buff *skb, + const struct net_device *dev) { __be16 protocol = skb->protocol; - netdev_features_t features = skb->dev->features; + netdev_features_t features = dev->features; - if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, features); + return harmonize_features(skb, dev, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2530,9 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, features); + return harmonize_features(skb, dev, features); } -EXPORT_SYMBOL(netif_skb_features); +EXPORT_SYMBOL(netif_skb_dev_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) -- cgit v0.10.2 From fe6cc55f3a9a053482a76f5a6b2257cee51b4663 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Feb 2014 23:09:12 +0100 Subject: net: ip, ipv6: handle gso skbs in forwarding path Marcelo Ricardo Leitner reported problems when the forwarding link path has a lower mtu than the incoming one if the inbound interface supports GRO. Given: Host R1 R2 Host sends tcp stream which is routed via R1 and R2. R1 performs GRO. In this case, the kernel will fail to send ICMP fragmentation needed messages (or pkt too big for ipv6), as GSO packets currently bypass dstmtu checks in forward path. Instead, Linux tries to send out packets exceeding the mtu. When locking route MTU on Host (i.e., no ipv4 DF bit set), R1 does not fragment the packets when forwarding, and again tries to send out packets exceeding R1-R2 link mtu. This alters the forwarding dstmtu checks to take the individual gso segment lengths into account. For ipv6, we send out pkt too big error for gso if the individual segments are too big. For ipv4, we either send icmp fragmentation needed, or, if the DF bit is not set, perform software segmentation and let the output path create fragments when the packet is leaving the machine. It is not 100% correct as the error message will contain the headers of the GRO skb instead of the original/segmented one, but it seems to work fine in my (limited) tests. Eric Dumazet suggested to simply shrink mss via ->gso_size to avoid sofware segmentation. However it turns out that skb_segment() assumes skb nr_frags is related to mss size so we would BUG there. I don't want to mess with it considering Herbert and Eric disagree on what the correct behavior should be. Hannes Frederic Sowa notes that when we would shrink gso_size skb_segment would then also need to deal with the case where SKB_MAX_FRAGS would be exceeded. This uses sofware segmentation in the forward path when we hit ipv4 non-DF packets and the outgoing link mtu is too small. Its not perfect, but given the lack of bug reports wrt. GRO fwd being broken this is a rare case anyway. Also its not like this could not be improved later once the dust settles. Acked-by: Herbert Xu Reported-by: Marcelo Ricardo Leitner Signed-off-by: Florian Westphal Signed-off-by: David S. Miller diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f589c9af..3ebbbe7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2916,5 +2916,22 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb) { return !skb->head_frag || skb_cloned(skb); } + +/** + * skb_gso_network_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_network_seglen is used to determine the real size of the + * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). + * + * The MAC/L2 header is not accounted for. + */ +static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - + skb_network_header(skb); + return hdr_len + skb_gso_transport_seglen(skb); +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e9f1217..f3869c1 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -39,6 +39,71 @@ #include #include +static bool ip_may_fragment(const struct sk_buff *skb) +{ + return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || + !skb->local_df; +} + +static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu || skb->local_df) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + +static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) +{ + unsigned int mtu; + + if (skb->local_df || !skb_is_gso(skb)) + return false; + + mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); + + /* if seglen > mtu, do software segmentation for IP fragmentation on + * output. DF bit cannot be set since ip_forward would have sent + * icmp error. + */ + return skb_gso_network_seglen(skb) > mtu; +} + +/* called if GSO skb needs to be fragmented on forward */ +static int ip_forward_finish_gso(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + netdev_features_t features; + struct sk_buff *segs; + int ret = 0; + + features = netif_skb_dev_features(skb, dst->dev); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = dst_output(segs); + + if (err && ret == 0) + ret = err; + segs = nskb; + } while (segs); + + return ret; +} + static int ip_forward_finish(struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); + if (ip_gso_exceeds_dst_mtu(skb)) + return ip_forward_finish_gso(skb); + return dst_output(skb); } @@ -91,8 +159,7 @@ int ip_forward(struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); - if (unlikely(skb->len > mtu && !skb_is_gso(skb) && - (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { + if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ef02b26..070a2fa 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu || skb->local_df) + return false; + + if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) + return true; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || - (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { + if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -- cgit v0.10.2 From 9d5e8ec6571293cb8a5321176d74d559d2d38542 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 13 Feb 2014 08:10:42 +0100 Subject: net: axienet: Fix compilation error Add missing header to fix compilation error. drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1575:22: error: undefined identifier 'irq_of_parse_and_map' drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1576:22: error: undefined identifier 'irq_of_parse_and_map' Signed-off-by: Michal Simek Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 1ec65fe..9fb8ab2 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 91ff37ff0b898e1ac8a1557b968a4918250f22ae Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 13 Feb 2014 08:10:43 +0100 Subject: net: axienet: Fix compilation warnings Warning log: xilinx_axienet_main.c: In function 'axienet_start_xmit_done': xilinx_axienet_main.c:617:16: warning: operation on 'lp->tx_bd_ci' may be undefined [-Wsequence-point] xilinx_axienet_main.c: In function 'axienet_start_xmit': xilinx_axienet_main.c:703:18: warning: operation on 'lp->tx_bd_tail' may be undefined [-Wsequence-point] xilinx_axienet_main.c:719:17: warning: operation on 'lp->tx_bd_tail' may be undefined [-Wsequence-point] xilinx_axienet_main.c: In function 'axienet_recv': xilinx_axienet_main.c:792:16: warning: operation on 'lp->rx_bd_ci' may be undefined [-Wsequence-point] xilinx_axienet_main.c: In function 'axienet_of_probe': xilinx_axienet_main.c:1501:21: warning: unused variable 'rc' [-Wunused-variable] Signed-off-by: Michal Simek Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9fb8ab2..4bfdf8c 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -601,7 +601,8 @@ static void axienet_start_xmit_done(struct net_device *ndev) size += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; packets++; - lp->tx_bd_ci = ++lp->tx_bd_ci % TX_BD_NUM; + ++lp->tx_bd_ci; + lp->tx_bd_ci %= TX_BD_NUM; cur_p = &lp->tx_bd_v[lp->tx_bd_ci]; status = cur_p->status; } @@ -687,7 +688,8 @@ static int axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_headlen(skb), DMA_TO_DEVICE); for (ii = 0; ii < num_frag; ii++) { - lp->tx_bd_tail = ++lp->tx_bd_tail % TX_BD_NUM; + ++lp->tx_bd_tail; + lp->tx_bd_tail %= TX_BD_NUM; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; frag = &skb_shinfo(skb)->frags[ii]; cur_p->phys = dma_map_single(ndev->dev.parent, @@ -703,7 +705,8 @@ static int axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail; /* Start the transfer */ axienet_dma_out32(lp, XAXIDMA_TX_TDESC_OFFSET, tail_p); - lp->tx_bd_tail = ++lp->tx_bd_tail % TX_BD_NUM; + ++lp->tx_bd_tail; + lp->tx_bd_tail %= TX_BD_NUM; return NETDEV_TX_OK; } @@ -775,7 +778,8 @@ static void axienet_recv(struct net_device *ndev) cur_p->status = 0; cur_p->sw_id_offset = (u32) new_skb; - lp->rx_bd_ci = ++lp->rx_bd_ci % RX_BD_NUM; + ++lp->rx_bd_ci; + lp->rx_bd_ci %= RX_BD_NUM; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; } -- cgit v0.10.2 From 9b2b6a2d669c909dd0b125fc834da94bcfc0aee7 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 12 Feb 2014 15:55:14 +0100 Subject: net: qmi_wwan: add support for Cinterion PXS8 and PHS8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the PXS8 and PHS8 devices show up with PID 0x0053 they will expose both a QMI port and a WWAN interface. CC: Hans-Christoph Schemmel CC: Christian Schmiedl CC: Nicolaus Colberg CC: David McCullough Signed-off-by: Aleksander Morgado Acked-by: Bjørn Mork Signed-off-by: David S. Miller diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index ff5c871..1eddd43 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -732,6 +732,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ {QMI_FIXED_INTF(0x0b3c, 0xc005, 6)}, /* Olivetti Olicard 200 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ -- cgit v0.10.2 From 89e101729be4cacd5847fef7e7c99680772c6e3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 12 Feb 2014 23:33:22 +0100 Subject: net: cpsw: catch of_get_phy_mode failing and propagate error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's wrong if the device tree doesn't provide a phy-mode property for the cpsw slaves as it is documented to be required in Documentation/devicetree/bindings/net/cpsw.txt. Anyhow it's nice to catch that problem, still more as it used to work without this property up to commit 388367a5a9fb (drivers: net: cpsw: use cpsw-phy-sel driver to configure phy mode) which is in v3.13-rc1. Signed-off-by: Uwe Kleine-König Acked-by: Mugunthan V N Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1d860ce..542c511 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1896,6 +1896,11 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); slave_data->phy_if = of_get_phy_mode(slave_node); + if (slave_data->phy_if < 0) { + pr_err("Missing or malformed slave[%d] phy-mode property\n", + i); + return slave_data->phy_if; + } if (data->dual_emac) { if (of_property_read_u32(slave_node, "dual_emac_res_vlan", -- cgit v0.10.2 From 219e288e8900fac65211e0a23e2a1037fd521af1 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Wed, 12 Feb 2014 18:58:21 -0800 Subject: net: sched: Cleanup PIE comments Fix incorrect comment reported by Norbert Kiesel. Edit another comment to add more details. Also add references to algorithm (IETF draft and paper) to top of file. Signed-off-by: Vijay Subramanian CC: Mythili Prabhu CC: Norbert Kiesel Signed-off-by: David S. Miller diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index a255d02..fefeeb7 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -15,6 +15,11 @@ * * ECN support is added by Naeem Khademi * University of Oslo, Norway. + * + * References: + * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00 + * IEEE Conference on High Performance Switching and Routing 2013 : + * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem" */ #include @@ -36,7 +41,7 @@ struct pie_params { psched_time_t target; /* user specified target delay in pschedtime */ u32 tupdate; /* timer frequency (in jiffies) */ u32 limit; /* number of packets that can be enqueued */ - u32 alpha; /* alpha and beta are between -4 and 4 */ + u32 alpha; /* alpha and beta are between 0 and 32 */ u32 beta; /* and are used for shift relative to 1 */ bool ecn; /* true if ecn is enabled */ bool bytemode; /* to scale drop early prob based on pkt size */ @@ -326,10 +331,16 @@ static void calculate_probability(struct Qdisc *sch) if (qdelay == 0 && qlen != 0) update_prob = false; - /* Add ranges for alpha and beta, more aggressive for high dropping - * mode and gentle steps for light dropping mode - * In light dropping mode, take gentle steps; in medium dropping mode, - * take medium steps; in high dropping mode, take big steps. + /* In the algorithm, alpha and beta are between 0 and 2 with typical + * value for alpha as 0.125. In this implementation, we use values 0-32 + * passed from user space to represent this. Also, alpha and beta have + * unit of HZ and need to be scaled before they can used to update + * probability. alpha/beta are updated locally below by 1) scaling them + * appropriately 2) scaling down by 16 to come to 0-2 range. + * Please see paper for details. + * + * We scale alpha and beta differently depending on whether we are in + * light, medium or high dropping mode. */ if (q->vars.prob < MAX_PROB / 100) { alpha = -- cgit v0.10.2 From 6726d971def461eded39cccce24d28f90a2e6df4 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 13 Feb 2014 16:09:58 +0900 Subject: USB2NET: SR9800: use %zu for size_t Use %zu for size_t in order to avoid the following build warning in printks. drivers/net/usb/sr9800.c: In function 'sr9800_bind' drivers/net/usb/sr9800.c:826:2: warning: format '%ld' expects argument of type 'long int' but argument 5 has type 'size_t' [-Wformat] Signed-off-by: Jingoo Han Signed-off-by: David S. Miller diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 4175eb9..8017108 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -823,7 +823,7 @@ static int sr9800_bind(struct usbnet *dev, struct usb_interface *intf) dev->rx_urb_size = SR9800_BULKIN_SIZE[SR9800_MAX_BULKIN_2K].size; } - netdev_dbg(dev->net, "%s : setting rx_urb_size with : %ld\n", __func__, + netdev_dbg(dev->net, "%s : setting rx_urb_size with : %zu\n", __func__, dev->rx_urb_size); return 0; -- cgit v0.10.2 From 208ece14e23b167b43d906c74e3bdcbee8d6e4aa Mon Sep 17 00:00:00 2001 From: Liu Junliang Date: Thu, 13 Feb 2014 12:22:19 +0800 Subject: USB2NET: Fix Default to 'y' for SR9800 Device Driver, setting to 'n' Signed-off-by: Liu Junliang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 409499f..7e7269f 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -296,7 +296,6 @@ config USB_NET_SR9800 tristate "CoreChip-sz SR9800 based USB 2.0 10/100 ethernet devices" depends on USB_USBNET select CRC32 - default y ---help--- Say Y if you want to use one of the following 100Mbps USB Ethernet device based on the CoreChip-sz SR9800 chip. -- cgit v0.10.2 From 0ad8b480d6ee916aa84324f69acf690142aecd0e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 13 Feb 2014 11:42:05 +0200 Subject: vhost: fix ref cnt checking deadlock vhost checked the counter within the refcnt before decrementing. It really wanted to know that it is the one that has the last reference, as a way to batch freeing resources a bit more efficiently. Note: we only let refcount go to 0 on device release. This works well but we now access the ref counter twice so there's a race: all users might see a high count and decide to defer freeing resources. In the end no one initiates freeing resources until the last reference is gone (which is on VM shotdown so might happen after a looooong time). Let's do what we probably should have done straight away: switch from kref to plain atomic, documenting the semantics, return the refcount value atomically after decrement, then use that to avoid the deadlock. Reported-by: Qin Chuanyu Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9a68409..41be4de 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -70,7 +70,12 @@ enum { }; struct vhost_net_ubuf_ref { - struct kref kref; + /* refcount follows semantics similar to kref: + * 0: object is released + * 1: no outstanding ubufs + * >1: outstanding ubufs + */ + atomic_t refcount; wait_queue_head_t wait; struct vhost_virtqueue *vq; }; @@ -116,14 +121,6 @@ static void vhost_net_enable_zcopy(int vq) vhost_net_zcopy_mask |= 0x1 << vq; } -static void vhost_net_zerocopy_done_signal(struct kref *kref) -{ - struct vhost_net_ubuf_ref *ubufs; - - ubufs = container_of(kref, struct vhost_net_ubuf_ref, kref); - wake_up(&ubufs->wait); -} - static struct vhost_net_ubuf_ref * vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) { @@ -134,21 +131,24 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); if (!ubufs) return ERR_PTR(-ENOMEM); - kref_init(&ubufs->kref); + atomic_set(&ubufs->refcount, 1); init_waitqueue_head(&ubufs->wait); ubufs->vq = vq; return ubufs; } -static void vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) +static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) { - kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); + int r = atomic_sub_return(1, &ubufs->refcount); + if (unlikely(!r)) + wake_up(&ubufs->wait); + return r; } static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) { - kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); - wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); + vhost_net_ubuf_put(ubufs); + wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); } static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) @@ -306,22 +306,21 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) { struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; struct vhost_virtqueue *vq = ubufs->vq; - int cnt = atomic_read(&ubufs->kref.refcount); + int cnt; /* set len to mark this desc buffers done DMA */ vq->heads[ubuf->desc].len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; - vhost_net_ubuf_put(ubufs); + cnt = vhost_net_ubuf_put(ubufs); /* * Trigger polling thread if guest stopped submitting new buffers: - * in this case, the refcount after decrement will eventually reach 1 - * so here it is 2. + * in this case, the refcount after decrement will eventually reach 1. * We also trigger polling periodically after each 16 packets * (the value 16 here is more or less arbitrary, it's tuned to trigger * less than 10% of times). */ - if (cnt <= 2 || !(cnt % 16)) + if (cnt <= 1 || !(cnt % 16)) vhost_poll_queue(&vq->poll); } @@ -420,7 +419,7 @@ static void handle_tx(struct vhost_net *net) msg.msg_control = ubuf; msg.msg_controllen = sizeof(ubuf); ubufs = nvq->ubufs; - kref_get(&ubufs->kref); + atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; } else { msg.msg_control = NULL; @@ -780,7 +779,7 @@ static void vhost_net_flush(struct vhost_net *n) vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = false; - kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref); + atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1); mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); } } -- cgit v0.10.2 From b0c057ca7e835b36c6050c7627634b664796c1d6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 13 Feb 2014 11:45:11 +0200 Subject: vhost: fix a theoretical race in device cleanup vhost_zerocopy_callback accesses VQ right after it drops a ubuf reference. In theory, this could race with device removal which waits on the ubuf kref, and crash on use after free. Do all accesses within rcu read side critical section, and synchronize on release. Since callbacks are always invoked from bh, synchronize_rcu_bh seems enough and will help release complete a bit faster. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 41be4de..a0fa5de 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -308,6 +308,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) struct vhost_virtqueue *vq = ubufs->vq; int cnt; + rcu_read_lock_bh(); + /* set len to mark this desc buffers done DMA */ vq->heads[ubuf->desc].len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; @@ -322,6 +324,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) */ if (cnt <= 1 || !(cnt % 16)) vhost_poll_queue(&vq->poll); + + rcu_read_unlock_bh(); } /* Expects to be always run from workqueue - which acts as @@ -799,6 +803,8 @@ static int vhost_net_release(struct inode *inode, struct file *f) fput(tx_sock->file); if (rx_sock) fput(rx_sock->file); + /* Make sure no callbacks are outstanding */ + synchronize_rcu_bh(); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); -- cgit v0.10.2 From 0d961b3b52f566f823070ce2366511a7f64b928c Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Thu, 13 Feb 2014 14:47:27 +0100 Subject: drivers: net: cpsw: fix buggy loop condition Commit 0cd8f9cc0654c06adde353c6532114c5f53a18e8 ("drivers: net: cpsw: enable promiscuous mode support") Enable promiscuous mode support for CPSW. Introduced a crash on an am335x based board (similiar to am335x-evm). Reason is buggy end condition in for loop in cpsw_set_promiscious() for (i = 0; i <= priv->data.slaves; i++) should be for (i = 0; i < priv->data.slaves; i++) Fix this ... Signed-off-by: Heiko Schocher Cc: Mugunthan V N Cc: David S. Miller Cc: Sebastian Siewior Cc: Daniel Mack Cc: Felipe Balbi Cc: Markus Pargmann Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Mugunthan V N Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 542c511..651087b 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -554,7 +554,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) * common for both the interface as the interface shares * the same hardware resource. */ - for (i = 0; i <= priv->data.slaves; i++) + for (i = 0; i < priv->data.slaves; i++) if (priv->slaves[i].ndev->flags & IFF_PROMISC) flag = true; @@ -578,7 +578,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) unsigned long timeout = jiffies + HZ; /* Disable Learn for all ports */ - for (i = 0; i <= priv->data.slaves; i++) { + for (i = 0; i < priv->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 1); cpsw_ale_control_set(ale, i, @@ -606,7 +606,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0); /* Enable Learn for all ports */ - for (i = 0; i <= priv->data.slaves; i++) { + for (i = 0; i < priv->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 0); cpsw_ale_control_set(ale, i, -- cgit v0.10.2 From 602b109942b816e56639a965ae81f3a2437004b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20S=C3=B8rensen?= Date: Thu, 13 Feb 2014 15:26:57 +0100 Subject: net:phy:dp83640: Move all HW initialization to dp83640_config_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit phy_init_hw not does a full PHY reset after the driver probe has finished, so any hw initialization done in the probe will be lost. Part of the timestamping functionality of the dp83640 is set up in the probe and with that lost, enabling timestamping will cause a PHY lockup, requiring a hard reset / power cycle to recover. This patch moves all the HW initialization in dp83640_probe to dp83640_config_init. Signed-off-by: Stefan Sørensen Signed-off-by: David S. Miller diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 9414fa2..98e7cbf 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1006,11 +1006,6 @@ static int dp83640_probe(struct phy_device *phydev) } else list_add_tail(&dp83640->list, &clock->phylist); - if (clock->chosen && !list_empty(&clock->phylist)) - recalibrate(clock); - else - enable_broadcast(dp83640->phydev, clock->page, 1); - dp83640_clock_put(clock); return 0; @@ -1063,6 +1058,14 @@ static void dp83640_remove(struct phy_device *phydev) static int dp83640_config_init(struct phy_device *phydev) { + struct dp83640_private *dp83640 = phydev->priv; + struct dp83640_clock *clock = dp83640->clock; + + if (clock->chosen && !list_empty(&clock->phylist)) + recalibrate(clock); + else + enable_broadcast(phydev, clock->page, 1); + enable_status_frames(phydev, true); ext_write(0, phydev, PAGE4, PTP_CTL, PTP_ENABLE); return 0; -- cgit v0.10.2 From 357137a4222b1984b3d867923c7e388669744ceb Mon Sep 17 00:00:00 2001 From: FX Le Bail Date: Mon, 10 Feb 2014 16:46:54 +0100 Subject: ipv4: ipconfig.c: add parentheses in an if statement Even if the 'time_before' macro expand with parentheses, the look is bad. Signed-off-by: Francois-Xavier Le Bail Signed-off-by: David S. Miller diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index efa1138..b3e86ea 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -273,7 +273,7 @@ static int __init ic_open_devs(void) msleep(1); - if time_before(jiffies, next_msg) + if (time_before(jiffies, next_msg)) continue; elapsed = jiffies_to_msecs(jiffies - start); -- cgit v0.10.2 From d43ff4cd798911736fb39025ec8004284b1b0bc2 Mon Sep 17 00:00:00 2001 From: Emil Goode Date: Thu, 13 Feb 2014 19:30:39 +0100 Subject: net: asix: add missing flag to struct driver_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct driver_info ax88178_info is assigned the function asix_rx_fixup_common as it's rx_fixup callback. This means that FLAG_MULTI_PACKET must be set as this function is cloning the data and calling usbnet_skb_return. Not setting this flag leads to usbnet_skb_return beeing called a second time from within the rx_process function in the usbnet module. Signed-off-by: Emil Goode Reported-by: Bjørn Mork Signed-off-by: David S. Miller diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 9765a7d..5d19409 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -917,7 +917,8 @@ static const struct driver_info ax88178_info = { .status = asix_status, .link_reset = ax88178_link_reset, .reset = ax88178_reset, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | + FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, .tx_fixup = asix_tx_fixup, }; -- cgit v0.10.2 From ce11c436720cdb8437340be93b2f5efe23ce3e1c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 13 Feb 2014 13:14:48 -0800 Subject: net: of_mdio: fix of_set_phy_supported after driver probing Commit 8fdade4 ("net: of_mdio: parse "max-speed" property to set PHY supported features") introduced a typo in of_set_phy_supported for the first assignment of phydev->supported which will not effectively limit the PHY device supported features bits if the PHY driver contains "higher" features (e.g: max-speed = <100> and PHY driver has PHY_GBIT_FEATURES set). Fix this by making sure that the very first thing is to reset to sane defaults (PHY_BASIC_FEATURES) and then progressively add speed features as we parse them. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 875b7b6..495facc 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -24,7 +24,11 @@ MODULE_LICENSE("GPL"); static void of_set_phy_supported(struct phy_device *phydev, u32 max_speed) { - phydev->supported |= PHY_DEFAULT_FEATURES; + /* The default values for phydev->supported are provided by the PHY + * driver "features" member, we want to reset to sane defaults fist + * before supporting higher speeds. + */ + phydev->supported &= PHY_DEFAULT_FEATURES; switch (max_speed) { default: -- cgit v0.10.2 From eb2d4c64879c99fc78e739b0147db28731ff474e Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 14 Feb 2014 08:21:04 +0100 Subject: net,bonding: fix bond_options.c direct rwlock.h include drivers/net/bonding/bond_options.c includes rwlock.h directly, which is a nono, and which also breaks RT kernel build. Signed-off-by: Mike Galbraith Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 11cb943..c378784 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v0.10.2 From 09db30805300e9ed5ad43d4d339115cf1d9c84e1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 13 Feb 2014 19:02:33 -0700 Subject: dccp: re-enable debug macro dccp tfrc: revert This reverts 6aee49c558de ("dccp: make local variable static") since the variable tfrc_debug is referenced by the tfrc_pr_debug(fmt, ...) macro when TFRC debugging is enabled. If it is enabled, use of the macro produces a compilation error. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index c073b81..62b5828 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -8,7 +8,7 @@ #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -static bool tfrc_debug; +bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index a3d8f7c..40ee7d6 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,6 +21,7 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG +extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) -- cgit v0.10.2 From 891de74d693bb4fefe2efcc6432a4a9a9bee561e Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 12 Feb 2014 16:54:27 -0800 Subject: hyperv: Fix the carrier status setting Without this patch, the "cat /sys/class/net/ethN/operstate" shows "unknown", and "ethtool ethN" shows "Link detected: yes", when VM boots up with or without vNIC connected. This patch fixed the problem. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Acked-by: Jason Wang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7756118..7141a19 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -88,8 +88,12 @@ static int netvsc_open(struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_device *device_obj = net_device_ctx->device_ctx; + struct netvsc_device *nvdev; + struct rndis_device *rdev; int ret = 0; + netif_carrier_off(net); + /* Open up the device */ ret = rndis_filter_open(device_obj); if (ret != 0) { @@ -99,6 +103,11 @@ static int netvsc_open(struct net_device *net) netif_start_queue(net); + nvdev = hv_get_drvdata(device_obj); + rdev = nvdev->extension; + if (!rdev->link_state) + netif_carrier_on(net); + return ret; } @@ -229,23 +238,24 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, struct net_device *net; struct net_device_context *ndev_ctx; struct netvsc_device *net_device; + struct rndis_device *rdev; net_device = hv_get_drvdata(device_obj); + rdev = net_device->extension; + + rdev->link_state = status != 1; + net = net_device->ndev; - if (!net) { - netdev_err(net, "got link status but net device " - "not initialized yet\n"); + if (!net || net->reg_state != NETREG_REGISTERED) return; - } + ndev_ctx = netdev_priv(net); if (status == 1) { - netif_carrier_on(net); - ndev_ctx = netdev_priv(net); schedule_delayed_work(&ndev_ctx->dwork, 0); schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); } else { - netif_carrier_off(net); + schedule_delayed_work(&ndev_ctx->dwork, 0); } } @@ -388,17 +398,35 @@ static const struct net_device_ops device_ops = { * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add * another netif_notify_peers() into a delayed work, otherwise GARP packet * will not be sent after quick migration, and cause network disconnection. + * Also, we update the carrier status here. */ -static void netvsc_send_garp(struct work_struct *w) +static void netvsc_link_change(struct work_struct *w) { struct net_device_context *ndev_ctx; struct net_device *net; struct netvsc_device *net_device; + struct rndis_device *rdev; + bool notify; + + rtnl_lock(); ndev_ctx = container_of(w, struct net_device_context, dwork.work); net_device = hv_get_drvdata(ndev_ctx->device_ctx); + rdev = net_device->extension; net = net_device->ndev; - netdev_notify_peers(net); + + if (rdev->link_state) { + netif_carrier_off(net); + notify = false; + } else { + netif_carrier_on(net); + notify = true; + } + + rtnl_unlock(); + + if (notify) + netdev_notify_peers(net); } @@ -414,13 +442,10 @@ static int netvsc_probe(struct hv_device *dev, if (!net) return -ENOMEM; - /* Set initial state */ - netif_carrier_off(net); - net_device_ctx = netdev_priv(net); net_device_ctx->device_ctx = dev; hv_set_drvdata(dev, net); - INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp); + INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); net->netdev_ops = &device_ops; @@ -443,8 +468,6 @@ static int netvsc_probe(struct hv_device *dev, } memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); - netif_carrier_on(net); - ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); -- cgit v0.10.2 From cd0f0b95fd2cd2b716caf5f15db73ab76992789b Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Fri, 14 Feb 2014 18:26:22 +0800 Subject: ipv4: distinguish EHOSTUNREACH from the ENETUNREACH since commit 251da413("ipv4: Cache ip_error() routes even when not forwarding."), the counter IPSTATS_MIB_INADDRERRORS can't work correctly, because the value of err was always set to ENETUNREACH. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 25071b4..6f6dd85 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1695,8 +1695,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; err = fib_lookup(net, &fl4, &res); - if (err != 0) + if (err != 0) { + if (!IN_DEV_FORWARD(in_dev)) + err = -EHOSTUNREACH; goto no_route; + } RT_CACHE_STAT_INC(in_slow_tot); @@ -1712,8 +1715,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto local_input; } - if (!IN_DEV_FORWARD(in_dev)) + if (!IN_DEV_FORWARD(in_dev)) { + err = -EHOSTUNREACH; goto no_route; + } if (res.type != RTN_UNICAST) goto martian_destination; -- cgit v0.10.2 From ef2820a735f74ea60335f8ba3801b844f0cb184d Mon Sep 17 00:00:00 2001 From: Matija Glavinic Pecotic Date: Fri, 14 Feb 2014 14:51:18 +0100 Subject: net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer Implementation of (a)rwnd calculation might lead to severe performance issues and associations completely stalling. These problems are described and solution is proposed which improves lksctp's robustness in congestion state. 1) Sudden drop of a_rwnd and incomplete window recovery afterwards Data accounted in sctp_assoc_rwnd_decrease takes only payload size (sctp data), but size of sk_buff, which is blamed against receiver buffer, is not accounted in rwnd. Theoretically, this should not be the problem as actual size of buffer is double the amount requested on the socket (SO_RECVBUF). Problem here is that this will have bad scaling for data which is less then sizeof sk_buff. E.g. in 4G (LTE) networks, link interfacing radio side will have a large portion of traffic of this size (less then 100B). An example of sudden drop and incomplete window recovery is given below. Node B exhibits problematic behavior. Node A initiates association and B is configured to advertise rwnd of 10000. A sends messages of size 43B (size of typical sctp message in 4G (LTE) network). On B data is left in buffer by not reading socket in userspace. Lets examine when we will hit pressure state and declare rwnd to be 0 for scenario with above stated parameters (rwnd == 10000, chunk size == 43, each chunk is sent in separate sctp packet) Logic is implemented in sctp_assoc_rwnd_decrease: socket_buffer (see below) is maximum size which can be held in socket buffer (sk_rcvbuf). current_alloced is amount of data currently allocated (rx_count) A simple expression is given for which it will be examined after how many packets for above stated parameters we enter pressure state: We start by condition which has to be met in order to enter pressure state: socket_buffer < currently_alloced; currently_alloced is represented as size of sctp packets received so far and not yet delivered to userspace. x is the number of chunks/packets (since there is no bundling, and each chunk is delivered in separate packet, we can observe each chunk also as sctp packet, and what is important here, having its own sk_buff): socket_buffer < x*each_sctp_packet; each_sctp_packet is sctp chunk size + sizeof(struct sk_buff). socket_buffer is twice the amount of initially requested size of socket buffer, which is in case of sctp, twice the a_rwnd requested: 2*rwnd < x*(payload+sizeof(struc sk_buff)); sizeof(struct sk_buff) is 190 (3.13.0-rc4+). Above is stated that rwnd is 10000 and each payload size is 43 20000 < x(43+190); x > 20000/233; x ~> 84; After ~84 messages, pressure state is entered and 0 rwnd is advertised while received 84*43B ~= 3612B sctp data. This is why external observer notices sudden drop from 6474 to 0, as it will be now shown in example: IP A.34340 > B.12345: sctp (1) [INIT] [init tag: 1875509148] [rwnd: 81920] [OS: 10] [MIS: 65535] [init TSN: 1096057017] IP B.12345 > A.34340: sctp (1) [INIT ACK] [init tag: 3198966556] [rwnd: 10000] [OS: 10] [MIS: 10] [init TSN: 902132839] IP A.34340 > B.12345: sctp (1) [COOKIE ECHO] IP B.12345 > A.34340: sctp (1) [COOKIE ACK] IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057017] [SID: 0] [SSEQ 0] [PPID 0x18] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057017] [a_rwnd 9957] [#gap acks 0] [#dup tsns 0] IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057018] [SID: 0] [SSEQ 1] [PPID 0x18] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057018] [a_rwnd 9957] [#gap acks 0] [#dup tsns 0] IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057019] [SID: 0] [SSEQ 2] [PPID 0x18] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057019] [a_rwnd 9914] [#gap acks 0] [#dup tsns 0] <...> IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057098] [SID: 0] [SSEQ 81] [PPID 0x18] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057098] [a_rwnd 6517] [#gap acks 0] [#dup tsns 0] IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057099] [SID: 0] [SSEQ 82] [PPID 0x18] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057099] [a_rwnd 6474] [#gap acks 0] [#dup tsns 0] IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057100] [SID: 0] [SSEQ 83] [PPID 0x18] --> Sudden drop IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057100] [a_rwnd 0] [#gap acks 0] [#dup tsns 0] At this point, rwnd_press stores current rwnd value so it can be later restored in sctp_assoc_rwnd_increase. This however doesn't happen as condition to start slowly increasing rwnd until rwnd_press is returned to rwnd is never met. This condition is not met since rwnd, after it hit 0, must first reach rwnd_press by adding amount which is read from userspace. Let us observe values in above example. Initial a_rwnd is 10000, pressure was hit when rwnd was ~6500 and the amount of actual sctp data currently waiting to be delivered to userspace is ~3500. When userspace starts to read, sctp_assoc_rwnd_increase will be blamed only for sctp data, which is ~3500. Condition is never met, and when userspace reads all data, rwnd stays on 3569. IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057100] [a_rwnd 1505] [#gap acks 0] [#dup tsns 0] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057100] [a_rwnd 3010] [#gap acks 0] [#dup tsns 0] IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057101] [SID: 0] [SSEQ 84] [PPID 0x18] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057101] [a_rwnd 3569] [#gap acks 0] [#dup tsns 0] --> At this point userspace read everything, rwnd recovered only to 3569 IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057102] [SID: 0] [SSEQ 85] [PPID 0x18] IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057102] [a_rwnd 3569] [#gap acks 0] [#dup tsns 0] Reproduction is straight forward, it is enough for sender to send packets of size less then sizeof(struct sk_buff) and receiver keeping them in its buffers. 2) Minute size window for associations sharing the same socket buffer In case multiple associations share the same socket, and same socket buffer (sctp.rcvbuf_policy == 0), different scenarios exist in which congestion on one of the associations can permanently drop rwnd of other association(s). Situation will be typically observed as one association suddenly having rwnd dropped to size of last packet received and never recovering beyond that point. Different scenarios will lead to it, but all have in common that one of the associations (let it be association from 1)) nearly depleted socket buffer, and the other association blames socket buffer just for the amount enough to start the pressure. This association will enter pressure state, set rwnd_press and announce 0 rwnd. When data is read by userspace, similar situation as in 1) will occur, rwnd will increase just for the size read by userspace but rwnd_press will be high enough so that association doesn't have enough credit to reach rwnd_press and restore to previous state. This case is special case of 1), being worse as there is, in the worst case, only one packet in buffer for which size rwnd will be increased. Consequence is association which has very low maximum rwnd ('minute size', in our case down to 43B - size of packet which caused pressure) and as such unusable. Scenario happened in the field and labs frequently after congestion state (link breaks, different probabilities of packet drop, packet reordering) and with scenario 1) preceding. Here is given a deterministic scenario for reproduction: >From node A establish two associations on the same socket, with rcvbuf_policy being set to share one common buffer (sctp.rcvbuf_policy == 0). On association 1 repeat scenario from 1), that is, bring it down to 0 and restore up. Observe scenario 1). Use small payload size (here we use 43). Once rwnd is 'recovered', bring it down close to 0, as in just one more packet would close it. This has as a consequence that association number 2 is able to receive (at least) one more packet which will bring it in pressure state. E.g. if association 2 had rwnd of 10000, packet received was 43, and we enter at this point into pressure, rwnd_press will have 9957. Once payload is delivered to userspace, rwnd will increase for 43, but conditions to restore rwnd to original state, just as in 1), will never be satisfied. --> Association 1, between A.y and B.12345 IP A.55915 > B.12345: sctp (1) [INIT] [init tag: 836880897] [rwnd: 10000] [OS: 10] [MIS: 65535] [init TSN: 4032536569] IP B.12345 > A.55915: sctp (1) [INIT ACK] [init tag: 2873310749] [rwnd: 81920] [OS: 10] [MIS: 10] [init TSN: 3799315613] IP A.55915 > B.12345: sctp (1) [COOKIE ECHO] IP B.12345 > A.55915: sctp (1) [COOKIE ACK] --> Association 2, between A.z and B.12346 IP A.55915 > B.12346: sctp (1) [INIT] [init tag: 534798321] [rwnd: 10000] [OS: 10] [MIS: 65535] [init TSN: 2099285173] IP B.12346 > A.55915: sctp (1) [INIT ACK] [init tag: 516668823] [rwnd: 81920] [OS: 10] [MIS: 10] [init TSN: 3676403240] IP A.55915 > B.12346: sctp (1) [COOKIE ECHO] IP B.12346 > A.55915: sctp (1) [COOKIE ACK] --> Deplete socket buffer by sending messages of size 43B over association 1 IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315613] [SID: 0] [SSEQ 0] [PPID 0x18] IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315613] [a_rwnd 9957] [#gap acks 0] [#dup tsns 0] <...> IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315696] [a_rwnd 6388] [#gap acks 0] [#dup tsns 0] IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315697] [SID: 0] [SSEQ 84] [PPID 0x18] IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315697] [a_rwnd 6345] [#gap acks 0] [#dup tsns 0] --> Sudden drop on 1 IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315698] [SID: 0] [SSEQ 85] [PPID 0x18] IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315698] [a_rwnd 0] [#gap acks 0] [#dup tsns 0] --> Here userspace read, rwnd 'recovered' to 3698, now deplete again using association 1 so there is place in buffer for only one more packet IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315799] [SID: 0] [SSEQ 186] [PPID 0x18] IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315799] [a_rwnd 86] [#gap acks 0] [#dup tsns 0] IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315800] [SID: 0] [SSEQ 187] [PPID 0x18] IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315800] [a_rwnd 43] [#gap acks 0] [#dup tsns 0] --> Socket buffer is almost depleted, but there is space for one more packet, send them over association 2, size 43B IP B.12346 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3676403240] [SID: 0] [SSEQ 0] [PPID 0x18] IP A.55915 > B.12346: sctp (1) [SACK] [cum ack 3676403240] [a_rwnd 0] [#gap acks 0] [#dup tsns 0] --> Immediate drop IP A.60995 > B.12346: sctp (1) [SACK] [cum ack 387491510] [a_rwnd 0] [#gap acks 0] [#dup tsns 0] --> Read everything from the socket, both association recover up to maximum rwnd they are capable of reaching, note that association 1 recovered up to 3698, and association 2 recovered only to 43 IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315800] [a_rwnd 1548] [#gap acks 0] [#dup tsns 0] IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315800] [a_rwnd 3053] [#gap acks 0] [#dup tsns 0] IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315801] [SID: 0] [SSEQ 188] [PPID 0x18] IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315801] [a_rwnd 3698] [#gap acks 0] [#dup tsns 0] IP B.12346 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3676403241] [SID: 0] [SSEQ 1] [PPID 0x18] IP A.55915 > B.12346: sctp (1) [SACK] [cum ack 3676403241] [a_rwnd 43] [#gap acks 0] [#dup tsns 0] A careful reader might wonder why it is necessary to reproduce 1) prior reproduction of 2). It is simply easier to observe when to send packet over association 2 which will push association into the pressure state. Proposed solution: Both problems share the same root cause, and that is improper scaling of socket buffer with rwnd. Solution in which sizeof(sk_buff) is taken into concern while calculating rwnd is not possible due to fact that there is no linear relationship between amount of data blamed in increase/decrease with IP packet in which payload arrived. Even in case such solution would be followed, complexity of the code would increase. Due to nature of current rwnd handling, slow increase (in sctp_assoc_rwnd_increase) of rwnd after pressure state is entered is rationale, but it gives false representation to the sender of current buffer space. Furthermore, it implements additional congestion control mechanism which is defined on implementation, and not on standard basis. Proposed solution simplifies whole algorithm having on mind definition from rfc: o Receiver Window (rwnd): This gives the sender an indication of the space available in the receiver's inbound buffer. Core of the proposed solution is given with these lines: sctp_assoc_rwnd_update: if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0) asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1; else asoc->rwnd = 0; We advertise to sender (half of) actual space we have. Half is in the braces depending whether you would like to observe size of socket buffer as SO_RECVBUF or twice the amount, i.e. size is the one visible from userspace, that is, from kernelspace. In this way sender is given with good approximation of our buffer space, regardless of the buffer policy - we always advertise what we have. Proposed solution fixes described problems and removes necessity for rwnd restoration algorithm. Finally, as proposed solution is simplification, some lines of code, along with some bytes in struct sctp_association are saved. Version 2 of the patch addressed comments from Vlad. Name of the function is set to be more descriptive, and two parts of code are changed, in one removing the superfluous call to sctp_assoc_rwnd_update since call would not result in update of rwnd, and the other being reordering of the code in a way that call to sctp_assoc_rwnd_update updates rwnd. Version 3 corrected change introduced in v2 in a way that existing function is not reordered/copied in line, but it is correctly called. Thanks Vlad for suggesting. Signed-off-by: Matija Glavinic Pecotic Reviewed-by: Alexander Sverdlin Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d992ca3..6ee76c8 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1653,17 +1653,6 @@ struct sctp_association { /* This is the last advertised value of rwnd over a SACK chunk. */ __u32 a_rwnd; - /* Number of bytes by which the rwnd has slopped. The rwnd is allowed - * to slop over a maximum of the association's frag_point. - */ - __u32 rwnd_over; - - /* Keeps treack of rwnd pressure. This happens when we have - * a window, but not recevie buffer (i.e small packets). This one - * is releases slowly (1 PMTU at a time ). - */ - __u32 rwnd_press; - /* This is the sndbuf size in use for the association. * This corresponds to the sndbuf size for the association, * as specified in the sk->sndbuf. @@ -1892,8 +1881,7 @@ void sctp_assoc_update(struct sctp_association *old, __u32 sctp_association_get_next_tsn(struct sctp_association *); void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *); -void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); -void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); +void sctp_assoc_rwnd_update(struct sctp_association *, bool); void sctp_assoc_set_primary(struct sctp_association *, struct sctp_transport *); void sctp_assoc_del_nonprimary_peers(struct sctp_association *, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5ae6092..f558433 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1367,44 +1367,35 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc) return false; } -/* Increase asoc's rwnd by len and send any window update SACK if needed. */ -void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) +/* Update asoc's rwnd for the approximated state in the buffer, + * and check whether SACK needs to be sent. + */ +void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer) { + int rx_count; struct sctp_chunk *sack; struct timer_list *timer; - if (asoc->rwnd_over) { - if (asoc->rwnd_over >= len) { - asoc->rwnd_over -= len; - } else { - asoc->rwnd += (len - asoc->rwnd_over); - asoc->rwnd_over = 0; - } - } else { - asoc->rwnd += len; - } + if (asoc->ep->rcvbuf_policy) + rx_count = atomic_read(&asoc->rmem_alloc); + else + rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); - /* If we had window pressure, start recovering it - * once our rwnd had reached the accumulated pressure - * threshold. The idea is to recover slowly, but up - * to the initial advertised window. - */ - if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) { - int change = min(asoc->pathmtu, asoc->rwnd_press); - asoc->rwnd += change; - asoc->rwnd_press -= change; - } + if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0) + asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1; + else + asoc->rwnd = 0; - pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n", - __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, - asoc->a_rwnd); + pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n", + __func__, asoc, asoc->rwnd, rx_count, + asoc->base.sk->sk_rcvbuf); /* Send a window update SACK if the rwnd has increased by at least the * minimum of the association's PMTU and half of the receive buffer. * The algorithm used is similar to the one described in * Section 4.2.3.3 of RFC 1122. */ - if (sctp_peer_needs_update(asoc)) { + if (update_peer && sctp_peer_needs_update(asoc)) { asoc->a_rwnd = asoc->rwnd; pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u " @@ -1426,45 +1417,6 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) } } -/* Decrease asoc's rwnd by len. */ -void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) -{ - int rx_count; - int over = 0; - - if (unlikely(!asoc->rwnd || asoc->rwnd_over)) - pr_debug("%s: association:%p has asoc->rwnd:%u, " - "asoc->rwnd_over:%u!\n", __func__, asoc, - asoc->rwnd, asoc->rwnd_over); - - if (asoc->ep->rcvbuf_policy) - rx_count = atomic_read(&asoc->rmem_alloc); - else - rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); - - /* If we've reached or overflowed our receive buffer, announce - * a 0 rwnd if rwnd would still be positive. Store the - * the potential pressure overflow so that the window can be restored - * back to original value. - */ - if (rx_count >= asoc->base.sk->sk_rcvbuf) - over = 1; - - if (asoc->rwnd >= len) { - asoc->rwnd -= len; - if (over) { - asoc->rwnd_press += asoc->rwnd; - asoc->rwnd = 0; - } - } else { - asoc->rwnd_over = len - asoc->rwnd; - asoc->rwnd = 0; - } - - pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n", - __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, - asoc->rwnd_press); -} /* Build the bind address list for the association based on info from the * local endpoint and the remote peer. diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 483dcd7..591b44d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6176,7 +6176,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * PMTU. In cases, such as loopback, this might be a rather * large spill over. */ - if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over || + if ((!chunk->data_accepted) && (!asoc->rwnd || (datalen > asoc->rwnd + asoc->frag_point))) { /* If this is the next TSN, consider reneging to make diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9e91d6e..7075ac8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2092,12 +2092,6 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, sctp_skb_pull(skb, copied); skb_queue_head(&sk->sk_receive_queue, skb); - /* When only partial message is copied to the user, increase - * rwnd by that amount. If all the data in the skb is read, - * rwnd is updated when the event is freed. - */ - if (!sctp_ulpevent_is_notification(event)) - sctp_assoc_rwnd_increase(event->asoc, copied); goto out; } else if ((event->msg_flags & MSG_NOTIFICATION) || (event->msg_flags & MSG_EOR)) diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 85c6465..8d198ae 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event, skb = sctp_event2skb(event); /* Set the owner and charge rwnd for bytes received. */ sctp_ulpevent_set_owner(event, asoc); - sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb)); + sctp_assoc_rwnd_update(asoc, false); if (!skb->data_len) return; @@ -1011,6 +1011,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) { struct sk_buff *skb, *frag; unsigned int len; + struct sctp_association *asoc; /* Current stack structures assume that the rcv buffer is * per socket. For UDP style sockets this is not true as @@ -1035,8 +1036,11 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) } done: - sctp_assoc_rwnd_increase(event->asoc, len); + asoc = event->asoc; + sctp_association_hold(asoc); sctp_ulpevent_release_owner(event); + sctp_assoc_rwnd_update(asoc, true); + sctp_association_put(asoc); } static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event) -- cgit v0.10.2 From c321f7d7c87cdc623c87845f6378620573e57512 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 14 Feb 2014 15:32:20 +0100 Subject: drivers/net: tulip_remove_one needs to call pci_disable_device() Otherwise the device is not completely shut down. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index add05f1..1642de7 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1939,6 +1939,7 @@ static void tulip_remove_one(struct pci_dev *pdev) pci_iounmap(pdev, tp->base_addr); free_netdev (dev); pci_release_regions (pdev); + pci_disable_device(pdev); /* pci_power_off (pdev, -1); */ } -- cgit v0.10.2 From 99932d4fc03a13bb3e94938fe25458fabc8f2fc3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 16 Feb 2014 15:55:20 +0100 Subject: netdevice: add queue selection fallback handler for ndo_select_queue Add a new argument for ndo_select_queue() callback that passes a fallback handler. This gets invoked through netdev_pick_tx(); fallback handler is currently __netdev_pick_tx() as most drivers invoke this function within their customized implementation in case for skbs that don't need any special handling. This fallback handler can then be replaced on other call-sites with different queue selection methods (e.g. in packet sockets, pktgen etc). This also has the nice side-effect that __netdev_pick_tx() is then only invoked from netdev_pick_tx() and export of that function to modules can be undone. Suggested-by: David S. Miller Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8676649..1c6104d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3707,7 +3707,7 @@ static inline int bond_slave_override(struct bonding *bond, static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { /* * This helper function exists to help dev_pick_tx get the correct diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 9d7419e..66c0df7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1873,7 +1873,7 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw) } u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { struct bnx2x *bp = netdev_priv(dev); @@ -1895,7 +1895,7 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, } /* select a non-FCoE queue */ - return __netdev_pick_tx(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp); + return fallback(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp); } void bnx2x_set_num_queues(struct bnx2x *bp) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index bfc58d4..a89a40f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -496,7 +496,7 @@ int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); /* select_queue callback */ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv); + void *accel_priv, select_queue_fallback_t fallback); static inline void bnx2x_update_rx_prod(struct bnx2x *bp, struct bnx2x_fastpath *fp, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6d4ada7..18076c4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6881,7 +6881,7 @@ static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size) } static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { struct ixgbe_fwd_adapter *fwd_adapter = accel_priv; #ifdef IXGBE_FCOE @@ -6907,7 +6907,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) break; default: - return __netdev_pick_tx(dev, skb); + return fallback(dev, skb); } f = &adapter->ring_feature[RING_F_FCOE]; @@ -6920,7 +6920,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, return txq + f->offset; #else - return __netdev_pick_tx(dev, skb); + return fallback(dev, skb); #endif } diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 8f9266c..fd4b6ae 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -619,7 +619,7 @@ ltq_etop_set_multicast_list(struct net_device *dev) static u16 ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { /* we are currently only using the first queue */ return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8e8a7eb..1345703 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -629,7 +629,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk } u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { struct mlx4_en_priv *priv = netdev_priv(dev); u16 rings_p_up = priv->num_tx_rings_p_up; @@ -641,7 +641,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, if (vlan_tx_tag_present(skb)) up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT; - return __netdev_pick_tx(dev, skb) % rings_p_up + up * rings_p_up; + return fallback(dev, skb) % rings_p_up + up * rings_p_up; } static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 3af04c3..9ca223b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -723,7 +723,7 @@ int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv); + void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index 023237a..17503da 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -2071,7 +2071,7 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) /* Return subqueue id on this core (one per core). */ static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { return smp_processor_id(); } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 2840742..c8624a8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1648,7 +1648,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) } static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { /* * This helper function exists to help dev_pick_tx get the correct diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 44c4db8..8fe9cb7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -366,7 +366,7 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) * hope the rxq no. may help here. */ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { struct tun_struct *tun = netdev_priv(dev); struct tun_flow_entry *e; diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index 4d79761..9d3d275 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -748,7 +748,7 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev) static u16 mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { skb->priority = cfg80211_classify8021d(skb, NULL); return mwifiex_1d_to_wmm_queue[skb->priority]; diff --git a/drivers/staging/bcm/Bcmnet.c b/drivers/staging/bcm/Bcmnet.c index 8dfdd27..95a2358 100644 --- a/drivers/staging/bcm/Bcmnet.c +++ b/drivers/staging/bcm/Bcmnet.c @@ -40,7 +40,7 @@ static INT bcm_close(struct net_device *dev) } static u16 bcm_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { return ClassifyPacket(netdev_priv(dev), skb); } diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c index eedffed..6f9ac27 100644 --- a/drivers/staging/netlogic/xlr_net.c +++ b/drivers/staging/netlogic/xlr_net.c @@ -307,7 +307,7 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb, } static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { return (u16)smp_processor_id(); } diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c index 68f98fa..7c9ee58 100644 --- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c +++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c @@ -653,7 +653,7 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb) } static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, select_queue_fallback_t fallback) { struct adapter *padapter = rtw_netdev_priv(dev); struct mlme_priv *pmlmepriv = &padapter->mlmepriv; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 21d4e6b..1de9c13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -752,6 +752,9 @@ struct netdev_phys_port_id { unsigned char id_len; }; +typedef u16 (*select_queue_fallback_t)(struct net_device *dev, + struct sk_buff *skb); + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -783,7 +786,7 @@ struct netdev_phys_port_id { * Required can not be NULL. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - * void *accel_priv); + * void *accel_priv, select_queue_fallback_t fallback); * Called to decide which queue to when device supports multiple * transmit queues. * @@ -1005,7 +1008,8 @@ struct net_device_ops { struct net_device *dev); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - void *accel_priv); + void *accel_priv, + select_queue_fallback_t fallback); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); @@ -1551,7 +1555,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, void *accel_priv); -u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); /* * Net namespace inlines diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 87577d4..75fe83f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -372,7 +372,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) #endif } -u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) { struct sock *sk = skb->sk; int queue_index = sk_tx_queue_get(sk); @@ -392,7 +392,6 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) return queue_index; } -EXPORT_SYMBOL(__netdev_pick_tx); struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, @@ -403,8 +402,8 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, - accel_priv); + queue_index = ops->ndo_select_queue(dev, skb, accel_priv, + __netdev_pick_tx); else queue_index = __netdev_pick_tx(dev, skb); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d6d1f1d..ce1c443 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1057,7 +1057,8 @@ static void ieee80211_uninit(struct net_device *dev) static u16 ieee80211_netdev_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, + select_queue_fallback_t fallback) { return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } @@ -1075,7 +1076,8 @@ static const struct net_device_ops ieee80211_dataif_ops = { static u16 ieee80211_monitor_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + void *accel_priv, + select_queue_fallback_t fallback) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; -- cgit v0.10.2 From b9507bdaf40e91fea2b1c0c1ee7dc627c8ee6fd6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 16 Feb 2014 15:55:21 +0100 Subject: netdevice: move netdev_cap_txqueue for shared usage to header In order to allow users to invoke netdev_cap_txqueue, it needs to be moved into netdevice.h header file. While at it, also add kernel doc header to document the API. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1de9c13..e8eeebd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2279,6 +2279,26 @@ static inline void netdev_reset_queue(struct net_device *dev_queue) } /** + * netdev_cap_txqueue - check if selected tx queue exceeds device queues + * @dev: network device + * @queue_index: given tx queue index + * + * Returns 0 if given tx queue index >= number of device tx queues, + * otherwise returns the originally passed tx queue index. + */ +static inline u16 netdev_cap_txqueue(struct net_device *dev, u16 queue_index) +{ + if (unlikely(queue_index >= dev->real_num_tx_queues)) { + net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); + return 0; + } + + return queue_index; +} + +/** * netif_running - test if up * @dev: network device * diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 75fe83f..e29e810 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -323,17 +323,6 @@ u32 __skb_get_poff(const struct sk_buff *skb) return poff; } -static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) -{ - if (unlikely(queue_index >= dev->real_num_tx_queues)) { - net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", - dev->name, queue_index, - dev->real_num_tx_queues); - return 0; - } - return queue_index; -} - static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS @@ -408,7 +397,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, queue_index = __netdev_pick_tx(dev, skb); if (!accel_priv) - queue_index = dev_cap_txqueue(dev, queue_index); + queue_index = netdev_cap_txqueue(dev, queue_index); } skb_set_queue_mapping(skb, queue_index); -- cgit v0.10.2 From 0fd5d57ba3456c4d0b77d1ae64be4818b47d7545 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 16 Feb 2014 15:55:22 +0100 Subject: packet: check for ndo_select_queue during queue selection Mathias reported that on an AMD Geode LX embedded board (ALiX) with ath9k driver PACKET_QDISC_BYPASS, introduced in commit d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option"), triggers a WARN_ON() coming from the driver itself via 066dae93bdf ("ath9k: rework tx queue selection and fix queue stopping/waking"). The reason why this happened is that ndo_select_queue() call is not invoked from direct xmit path i.e. for ieee80211 subsystem that sets queue and TID (similar to 802.1d tag) which is being put into the frame through 802.11e (WMM, QoS). If that is not set, pending frame counter for e.g. ath9k can get messed up. So the WARN_ON() in ath9k is absolutely legitimate. Generally, the hw queue selection in ieee80211 depends on the type of traffic, and priorities are set according to ieee80211_ac_numbers mapping; working in a similar way as DiffServ only on a lower layer, so that the AP can favour frames that have "real-time" requirements like voice or video data frames. Therefore, check for presence of ndo_select_queue() in netdev ops and, if available, invoke it with a fallback handler to __packet_pick_tx_queue(), so that driver such as bnx2x, ixgbe, or mlx4 can still select a hw queue for transmission in relation to the current CPU while e.g. ieee80211 subsystem can make their own choices. Reported-by: Mathias Kretschmer Signed-off-by: Daniel Borkmann Cc: Jesper Dangaard Brouer Signed-off-by: David S. Miller diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6a2bb37..b5dc116 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -308,11 +308,27 @@ static bool packet_use_direct_xmit(const struct packet_sock *po) return po->xmit == packet_direct_xmit; } -static u16 packet_pick_tx_queue(struct net_device *dev) +static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; } +static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) +{ + const struct net_device_ops *ops = dev->netdev_ops; + u16 queue_index; + + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb, NULL, + __packet_pick_tx_queue); + queue_index = netdev_cap_txqueue(dev, queue_index); + } else { + queue_index = __packet_pick_tx_queue(dev, skb); + } + + skb_set_queue_mapping(skb, queue_index); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -2285,7 +2301,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } - skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); + packet_pick_tx_queue(dev, skb); + skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); packet_inc_pending(&po->tx_ring); @@ -2499,7 +2516,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); + + packet_pick_tx_queue(dev, skb); if (po->has_vnet_hdr) { if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { -- cgit v0.10.2 From 930cd6e46eadce8b8ed2a232ee536e5fd286c152 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 21 Jan 2014 11:22:05 +0100 Subject: batman-adv: fix soft-interface MTU computation The current MTU computation always returns a value smaller than 1500bytes even if the real interfaces have an MTU large enough to compensate the batman-adv overhead. Fix the computation by properly returning the highest admitted value. Introduced by a19d3d85e1b854e4a483a55d740a42458085560d ("batman-adv: limit local translation table max size") Reported-by: Russell Senior Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 3d417d3..b851cc5 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -241,7 +241,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); const struct batadv_hard_iface *hard_iface; - int min_mtu = ETH_DATA_LEN; + int min_mtu = INT_MAX; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { @@ -256,8 +256,6 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) } rcu_read_unlock(); - atomic_set(&bat_priv->packet_size_max, min_mtu); - if (atomic_read(&bat_priv->fragmentation) == 0) goto out; @@ -268,13 +266,21 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE); min_mtu -= sizeof(struct batadv_frag_packet); min_mtu *= BATADV_FRAG_MAX_FRAGMENTS; - atomic_set(&bat_priv->packet_size_max, min_mtu); - - /* with fragmentation enabled we can fragment external packets easily */ - min_mtu = min_t(int, min_mtu, ETH_DATA_LEN); out: - return min_mtu - batadv_max_header_len(); + /* report to the other components the maximum amount of bytes that + * batman-adv can send over the wire (without considering the payload + * overhead). For example, this value is used by TT to compute the + * maximum local table table size + */ + atomic_set(&bat_priv->packet_size_max, min_mtu); + + /* the real soft-interface MTU is computed by removing the payload + * overhead from the maximum amount of bytes that was just computed. + * + * However batman-adv does not support MTUs bigger than ETH_DATA_LEN + */ + return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN); } /* adjusts the MTU if a new interface with a smaller MTU appeared. */ -- cgit v0.10.2 From e889241f45f9cecbc84a6ffed577083ab52e62ee Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 27 Jan 2014 12:23:28 +0100 Subject: batman-adv: fix TT-TVLV parsing on OGM reception When accessing a TT-TVLV container in the OGM RX path the variable pointing to the list of changes to apply is altered by mistake. This makes the TT component read data at the wrong position in the OGM packet buffer. Fix it by removing the bogus pointer alteration. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b6071f6..beba13f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3218,7 +3218,6 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, spin_lock_bh(&orig_node->tt_lock); - tt_change = (struct batadv_tvlv_tt_change *)tt_buff; batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, tt_change); -- cgit v0.10.2 From 91c2b1a9f680ff105369d49abc7e19ca7efb33e1 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 28 Jan 2014 02:06:47 +0100 Subject: batman-adv: release vlan object after checking the CRC There is a refcounter unbalance in the CRC checking routine invoked on OGM reception. A vlan object is retrieved (thus its refcounter is increased by one) but it is never properly released. This leads to a memleak because the vlan object will never be free'd. Fix this by releasing the vlan object after having read the CRC. Reported-by: Russell Senior Reported-by: Daniel Reported-by: cmsv Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index beba13f..c21c557 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2262,6 +2262,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, { struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; struct batadv_orig_node_vlan *vlan; + uint32_t crc; int i; /* check if each received CRC matches the locally stored one */ @@ -2281,7 +2282,10 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, if (!vlan) return false; - if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc)) + crc = vlan->tt.crc; + batadv_orig_node_vlan_free_ref(vlan); + + if (crc != ntohl(tt_vlan_tmp->crc)) return false; } -- cgit v0.10.2 From f1791425cf0bcda43ab9a9a37df1ad3ccb1f6654 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 30 Jan 2014 00:12:24 +0100 Subject: batman-adv: properly check pskb_may_pull return value pskb_may_pull() returns 1 on success and 0 in case of failure, therefore checking for the return value being negative does not make sense at all. This way if the function fails we will probably read beyond the current skb data buffer. Fix this by doing the proper check. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 1ed9f7c..c26f073 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -688,7 +688,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, int is_old_ttvn; /* check if there is enough data before accessing it */ - if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0) + if (!pskb_may_pull(skb, hdr_len + ETH_HLEN)) return 0; /* create a copy of the skb (in case of for re-routing) to modify it. */ -- cgit v0.10.2 From 08bf0ed29c7ded45c477d08618220dd200c3524a Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 29 Jan 2014 11:25:12 +0100 Subject: batman-adv: avoid potential race condition when adding a new neighbour When adding a new neighbour it is important to atomically perform the following: - check if the neighbour already exists - append the neighbour to the proper list If the two operations are not performed in an atomic context it is possible that two concurrent insertions add the same neighbour twice. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 512159b..094ae7c 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -266,7 +266,7 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, struct batadv_orig_node *orig_neigh) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_neigh_node *neigh_node; + struct batadv_neigh_node *neigh_node, *tmp_neigh_node; neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node); if (!neigh_node) @@ -281,14 +281,24 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, neigh_node->orig_node = orig_neigh; neigh_node->if_incoming = hard_iface; - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM for orig_node %pM on interface %s\n", - neigh_addr, orig_node->orig, hard_iface->net_dev->name); - spin_lock_bh(&orig_node->neigh_list_lock); - hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); + tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface, + neigh_addr); + if (!tmp_neigh_node) { + hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); + } else { + kfree(neigh_node); + batadv_hardif_free_ref(hard_iface); + neigh_node = tmp_neigh_node; + } spin_unlock_bh(&orig_node->neigh_list_lock); + if (!tmp_neigh_node) + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Creating new neighbor %pM for orig_node %pM on interface %s\n", + neigh_addr, orig_node->orig, + hard_iface->net_dev->name); + out: return neigh_node; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6df12a2..8539416 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -458,6 +458,42 @@ out: } /** + * batadv_neigh_node_get - retrieve a neighbour from the list + * @orig_node: originator which the neighbour belongs to + * @hard_iface: the interface where this neighbour is connected to + * @addr: the address of the neighbour + * + * Looks for and possibly returns a neighbour belonging to this originator list + * which is connected through the provided hard interface. + * Returns NULL if the neighbour is not found. + */ +struct batadv_neigh_node * +batadv_neigh_node_get(const struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *hard_iface, + const uint8_t *addr) +{ + struct batadv_neigh_node *tmp_neigh_node, *res = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { + if (!batadv_compare_eth(tmp_neigh_node->addr, addr)) + continue; + + if (tmp_neigh_node->if_incoming != hard_iface) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + res = tmp_neigh_node; + break; + } + rcu_read_unlock(); + + return res; +} + +/** * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object * @rcu: rcu pointer of the orig_ifinfo object */ diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 37be290..db3a9ed 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -29,6 +29,10 @@ void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * +batadv_neigh_node_get(const struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *hard_iface, + const uint8_t *addr); +struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node); -- cgit v0.10.2 From b2262df7fcf2c395eca564df83238e931d88d7bf Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sat, 8 Feb 2014 16:45:06 +0100 Subject: batman-adv: fix potential orig_node reference leak Since batadv_orig_node_new() sets the refcount to two, assuming that the calling function will use a reference for putting the orig_node into a hash or similar, both references must be freed if initialization of the orig_node fails. Otherwise that object may be leaked in that error case. Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 094ae7c..42cbc0a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -254,6 +254,8 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr) free_bcast_own: kfree(orig_node->bat_iv.bcast_own); free_orig_node: + /* free twice, as batadv_orig_node_new sets refcount to 2 */ + batadv_orig_node_free_ref(orig_node); batadv_orig_node_free_ref(orig_node); return NULL; -- cgit v0.10.2 From a30e22ca8464c2dc573e0144a972221c2f06c2cd Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 11 Feb 2014 17:05:06 +0100 Subject: batman-adv: fix TT CRC computation by ensuring byte order When computing the CRC on a 2byte variable the order of the bytes obviously alters the final result. This means that computing the CRC over the same value on two archs having different endianess leads to different numbers. The global and local translation table CRC computation routine makes this mistake while processing the clients VIDs. The result is a continuous CRC mismatching between nodes having different endianess. Fix this by converting the VID to Network Order before processing it. This guarantees that every node uses the same byte order. Introduced by 7ea7b4a142758deaf46c1af0ca9ceca6dd55138b ("batman-adv: make the TT CRC logic VLAN specific") Reported-by: Russel Senior Signed-off-by: Antonio Quartulli Tested-by: Russell Senior Signed-off-by: Marek Lindner diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c21c557..959dde7 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1975,6 +1975,7 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct hlist_head *head; uint32_t i, crc_tmp, crc = 0; uint8_t flags; + __be16 tmp_vid; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2011,8 +2012,11 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, orig_node)) continue; - crc_tmp = crc32c(0, &tt_common->vid, - sizeof(tt_common->vid)); + /* use network order to read the VID: this ensures that + * every node reads the bytes in the same order. + */ + tmp_vid = htons(tt_common->vid); + crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); /* compute the CRC on flags that have to be kept in sync * among nodes @@ -2046,6 +2050,7 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv, struct hlist_head *head; uint32_t i, crc_tmp, crc = 0; uint8_t flags; + __be16 tmp_vid; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2064,8 +2069,11 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv, if (tt_common->flags & BATADV_TT_CLIENT_NEW) continue; - crc_tmp = crc32c(0, &tt_common->vid, - sizeof(tt_common->vid)); + /* use network order to read the VID: this ensures that + * every node reads the bytes in the same order. + */ + tmp_vid = htons(tt_common->vid); + crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); /* compute the CRC on flags that have to be kept in sync * among nodes -- cgit v0.10.2 From 05c3c8a636aa9ee35ce13f65afc5b665615cc786 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 11 Feb 2014 17:05:07 +0100 Subject: batman-adv: free skb on TVLV parsing success When the TVLV parsing routine succeed the skb is left untouched thus leading to a memory leak. Fix this by consuming the skb in case of success. Introduced by ef26157747d42254453f6b3ac2bd8bd3c53339c3 ("batman-adv: tvlv - basic infrastructure") Reported-by: Russel Senior Signed-off-by: Antonio Quartulli Tested-by: Russell Senior Signed-off-by: Marek Lindner diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index c26f073..a953d5b 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -918,6 +918,8 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, if (ret != NET_RX_SUCCESS) ret = batadv_route_unicast_packet(skb, recv_if); + else + consume_skb(skb); return ret; } -- cgit v0.10.2 From a5a5cb8cab526af2f6cbe9715f8ca843192f0d81 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 15 Feb 2014 02:17:20 +0100 Subject: batman-adv: avoid double free when orig_node initialization fails In the failure path of the orig_node initialization routine the orig_node->bat_iv.bcast_own field is free'd twice: first in batadv_iv_ogm_orig_get() and then later in batadv_orig_node_free_rcu(). Fix it by removing the kfree in batadv_iv_ogm_orig_get(). Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 42cbc0a..8323bce 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -241,18 +241,16 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr) size = bat_priv->num_ifaces * sizeof(uint8_t); orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC); if (!orig_node->bat_iv.bcast_own_sum) - goto free_bcast_own; + goto free_orig_node; hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) - goto free_bcast_own; + goto free_orig_node; return orig_node; -free_bcast_own: - kfree(orig_node->bat_iv.bcast_own); free_orig_node: /* free twice, as batadv_orig_node_new sets refcount to 2 */ batadv_orig_node_free_ref(orig_node); -- cgit v0.10.2 From 70b271a78beba787155d6696aacd7c4d4a251c50 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 15 Feb 2014 21:50:37 +0100 Subject: batman-adv: fix potential kernel paging error for unicast transmissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit batadv_send_skb_prepare_unicast(_4addr) might reallocate the skb's data. If it does then our ethhdr pointer is not valid anymore in batadv_send_skb_unicast(), resulting in a kernel paging error. Fixing this by refetching the ethhdr pointer after the potential reallocation. Signed-off-by: Linus Lüssing Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 579f5f0..843febd 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -254,9 +254,9 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, unsigned short vid) { - struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + struct ethhdr *ethhdr; struct batadv_unicast_packet *unicast_packet; - int ret = NET_XMIT_DROP; + int ret = NET_XMIT_DROP, hdr_size; if (!orig_node) goto out; @@ -265,12 +265,16 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv, case BATADV_UNICAST: if (!batadv_send_skb_prepare_unicast(skb, orig_node)) goto out; + + hdr_size = sizeof(*unicast_packet); break; case BATADV_UNICAST_4ADDR: if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node, packet_subtype)) goto out; + + hdr_size = sizeof(struct batadv_unicast_4addr_packet); break; default: /* this function supports UNICAST and UNICAST_4ADDR only. It @@ -279,6 +283,7 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv, goto out; } + ethhdr = (struct ethhdr *)(skb->data + hdr_size); unicast_packet = (struct batadv_unicast_packet *)skb->data; /* inform the destination node that we are still missing a correct route -- cgit v0.10.2 From 08b44656c08c8c2f73cdac2a058be2880e3361f2 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 17 Feb 2014 14:22:21 +0100 Subject: gre: add link local route when local addr is any This bug was reported by Steinar H. Gunderson and was introduced by commit f7cb8886335d ("sit/gre6: don't try to add the same route two times"). root@morgental:~# ip tunnel add foo mode gre remote 1.2.3.4 ttl 64 root@morgental:~# ip link set foo up mtu 1468 root@morgental:~# ip -6 route show dev foo fe80::/64 proto kernel metric 256 but after the above commit, no such route shows up. There is no link local route because dev->dev_addr is 0 (because local ipv4 address is 0), hence no link local address is configured. In this scenario, the link local address is added manually: 'ip -6 addr add fe80::1 dev foo' and because prefix is /128, no link local route is added by the kernel. Even if the right things to do is to add the link local address with a /64 prefix, we need to restore the previous behavior to avoid breaking userpace. Reported-by: Steinar H. Gunderson Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ad23569..fdbfeca 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2783,6 +2783,8 @@ static void addrconf_gre_config(struct net_device *dev) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) addrconf_add_linklocal(idev, &addr); + else + addrconf_prefix_route(&addr, 64, dev, 0, 0); } #endif -- cgit v0.10.2 From eb85569fe2d06c2fbf4de7b66c263ca095b397aa Mon Sep 17 00:00:00 2001 From: Emil Goode Date: Thu, 13 Feb 2014 17:50:19 +0100 Subject: usbnet: remove generic hard_header_len check This patch removes a generic hard_header_len check from the usbnet module that is causing dropped packages under certain circumstances for devices that send rx packets that cross urb boundaries. One example is the AX88772B which occasionally send rx packets that cross urb boundaries where the remaining partial packet is sent with no hardware header. When the buffer with a partial packet is of less number of octets than the value of hard_header_len the buffer is discarded by the usbnet module. With AX88772B this can be reproduced by using ping with a packet size between 1965-1976. The bug has been reported here: https://bugzilla.kernel.org/show_bug.cgi?id=29082 This patch introduces the following changes: - Removes the generic hard_header_len check in the rx_complete function in the usbnet module. - Introduces a ETH_HLEN check for skbs that are not cloned from within a rx_fixup callback. - For safety a hard_header_len check is added to each rx_fixup callback function that could be affected by this change. These extra checks could possibly be removed by someone who has the hardware to test. - Removes a call to dev_kfree_skb_any() and instead utilizes the dev->done list to queue skbs for cleanup. The changes place full responsibility on the rx_fixup callback functions that clone skbs to only pass valid skbs to the usbnet_skb_return function. Signed-off-by: Emil Goode Reported-by: Igor Gnatenko Signed-off-by: David S. Miller diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index d6f64da..955df81 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1118,6 +1118,10 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u16 hdr_off; u32 *pkt_hdr; + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + skb_trim(skb, skb->len - 4); memcpy(&rx_hdr, skb_tail_pointer(skb), 4); le32_to_cpus(&rx_hdr); diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c index e4a8a93..1cc24e6 100644 --- a/drivers/net/usb/gl620a.c +++ b/drivers/net/usb/gl620a.c @@ -84,6 +84,10 @@ static int genelink_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u32 size; u32 count; + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + header = (struct gl_header *) skb->data; // get the packet count of the received skb diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index a305a7b..82d844a 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -526,8 +526,9 @@ static int mcs7830_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { u8 status; - if (skb->len == 0) { - dev_err(&dev->udev->dev, "unexpected empty rx frame\n"); + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) { + dev_err(&dev->udev->dev, "unexpected tiny rx frame\n"); return 0; } diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c index 0a85d92..4cbdb13 100644 --- a/drivers/net/usb/net1080.c +++ b/drivers/net/usb/net1080.c @@ -364,6 +364,10 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) struct nc_trailer *trailer; u16 hdr_len, packet_len; + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + if (!(skb->len & 0x01)) { netdev_dbg(dev->net, "rx framesize %d range %d..%d mtu %d\n", skb->len, dev->net->hard_header_len, dev->hard_mtu, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 1eddd43..313cb6c 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -80,10 +80,10 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { __be16 proto; - /* usbnet rx_complete guarantees that skb->len is at least - * hard_header_len, so we can inspect the dest address without - * checking skb->len - */ + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + switch (skb->data[0] & 0xf0) { case 0x40: proto = htons(ETH_P_IP); diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index a48bc0f..524a47a281 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -492,6 +492,10 @@ EXPORT_SYMBOL_GPL(rndis_unbind); */ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + /* peripheral may have batched packets to us... */ while (likely(skb->len)) { struct rndis_data_hdr *hdr = (void *)skb->data; diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index f17b9e0..d9e7892 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -2106,6 +2106,10 @@ static void smsc75xx_rx_csum_offload(struct usbnet *dev, struct sk_buff *skb, static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + while (skb->len > 0) { u32 rx_cmd_a, rx_cmd_b, align_count, size; struct sk_buff *ax_skb; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 8dd54a0..424db65e 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1723,6 +1723,10 @@ static void smsc95xx_rx_csum_offload(struct sk_buff *skb) static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + while (skb->len > 0) { u32 header, align_count; struct sk_buff *ax_skb; diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 8017108..b94a0fb 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -63,6 +63,10 @@ static int sr_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { int offset = 0; + /* This check is no longer done by usbnet */ + if (skb->len < dev->net->hard_header_len) + return 0; + while (offset + sizeof(u32) < skb->len) { struct sk_buff *sr_skb; u16 size; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 4671da7..dd10d58 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -542,17 +542,19 @@ static inline void rx_process (struct usbnet *dev, struct sk_buff *skb) } // else network stack removes extra byte if we forced a short packet - if (skb->len) { - /* all data was already cloned from skb inside the driver */ - if (dev->driver_info->flags & FLAG_MULTI_PACKET) - dev_kfree_skb_any(skb); - else - usbnet_skb_return(dev, skb); + /* all data was already cloned from skb inside the driver */ + if (dev->driver_info->flags & FLAG_MULTI_PACKET) + goto done; + + if (skb->len < ETH_HLEN) { + dev->net->stats.rx_errors++; + dev->net->stats.rx_length_errors++; + netif_dbg(dev, rx_err, dev->net, "rx length %d\n", skb->len); + } else { + usbnet_skb_return(dev, skb); return; } - netif_dbg(dev, rx_err, dev->net, "drop\n"); - dev->net->stats.rx_errors++; done: skb_queue_tail(&dev->done, skb); } @@ -574,13 +576,6 @@ static void rx_complete (struct urb *urb) switch (urb_status) { /* success */ case 0: - if (skb->len < dev->net->hard_header_len) { - state = rx_cleanup; - dev->net->stats.rx_errors++; - dev->net->stats.rx_length_errors++; - netif_dbg(dev, rx_err, dev->net, - "rx length %d\n", skb->len); - } break; /* stalls need manual reset. this is rare ... except that -- cgit v0.10.2 From 163c8ff30dbe473abfbb24a7eac5536c87f3baa9 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Fri, 14 Feb 2014 18:13:50 +0100 Subject: bonding: 802.3ad: make aggregator_identifier bond-private aggregator_identifier is used to assign unique aggregator identifiers to aggregators of a bond during device enslaving. aggregator_identifier is currently a global variable that is zeroed in bond_3ad_initialize(). This sequence will lead to duplicate aggregator identifiers for eth1 and eth3: create bond0 change bond0 mode to 802.3ad enslave eth0 to bond0 //eth0 gets agg id 1 enslave eth1 to bond0 //eth1 gets agg id 2 create bond1 change bond1 mode to 802.3ad enslave eth2 to bond1 //aggregator_identifier is reset to 0 //eth2 gets agg id 1 enslave eth3 to bond0 //eth3 gets agg id 2 Fix this by making aggregator_identifier private to the bond. Signed-off-by: Jiri Bohac Acked-by: Veaceslav Falico Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index cce1f1b..6d20fbd 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1796,8 +1796,6 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) BOND_AD_INFO(bond).agg_select_timer = timeout; } -static u16 aggregator_identifier; - /** * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures * @bond: bonding struct to work on @@ -1811,7 +1809,7 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr), bond->dev->dev_addr)) { - aggregator_identifier = 0; + BOND_AD_INFO(bond).aggregator_identifier = 0; BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); @@ -1880,7 +1878,7 @@ void bond_3ad_bind_slave(struct slave *slave) ad_initialize_agg(aggregator); aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); - aggregator->aggregator_identifier = (++aggregator_identifier); + aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier; aggregator->slave = slave; aggregator->is_active = 0; aggregator->num_of_ports = 0; diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index 13dc9d3..f4dd959 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -253,6 +253,7 @@ struct ad_system { struct ad_bond_info { struct ad_system system; /* 802.3ad system structure */ u32 agg_select_timer; // Timer to select aggregator after all adapter's hand shakes + u16 aggregator_identifier; }; struct ad_slave_info { -- cgit v0.10.2 From 3eca5299532ea9d6ab63b03e7aaac9071d5eed29 Mon Sep 17 00:00:00 2001 From: Tommie Gannert Date: Mon, 17 Feb 2014 20:46:04 +0000 Subject: irtty-sir.c: Do not set_termios() on irtty_close() Issuing set_termios() from irtty_close() causes kernel Oops for unplugged usb-serial devices. Since no other tty_ldisc calls set_termios() on close and no tty driver seem to check if tty->device_data is NULL or not on entry to set_termios(), the only solution I can come up with is to remove the irtty_stop_receiver() call, which only updates termios. Signed-off-by: Tommie Gannert Signed-off-by: David S. Miller diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 177441a..24b6ddd 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -522,7 +522,6 @@ static void irtty_close(struct tty_struct *tty) sirdev_put_instance(priv->dev); /* Stop tty */ - irtty_stop_receiver(tty, TRUE); clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); if (tty->ops->stop) tty->ops->stop(tty); -- cgit v0.10.2 From a6254864c08109c66a194612585afc0439005286 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Mon, 17 Feb 2014 15:23:43 +0800 Subject: ipv4: fix counter in_slow_tot since commit 89aef8921bf("ipv4: Delete routing cache."), the counter in_slow_tot can't work correctly. The counter in_slow_tot increase by one when fib_lookup() return successfully in ip_route_input_slow(), but actually the dst struct maybe not be created and cached, so we can increase in_slow_tot after the dst struct is created. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6f6dd85..4c011ec 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1597,6 +1597,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); + RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1701,8 +1702,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto no_route; } - RT_CACHE_STAT_INC(in_slow_tot); - if (res.type == RTN_BROADCAST) goto brd_input; @@ -1773,6 +1772,7 @@ local_input: rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); + RT_CACHE_STAT_INC(in_slow_tot); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; -- cgit v0.10.2 From ffd5939381c609056b33b7585fb05a77b4c695f3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 17 Feb 2014 12:11:11 +0100 Subject: net: sctp: fix sctp_connectx abi for ia32 emulation/compat mode SCTP's sctp_connectx() abi breaks for 64bit kernels compiled with 32bit emulation (e.g. ia32 emulation or x86_x32). Due to internal usage of 'struct sctp_getaddrs_old' which includes a struct sockaddr pointer, sizeof(param) check will always fail in kernel as the structure in 64bit kernel space is 4bytes larger than for user binaries compiled in 32bit mode. Thus, applications making use of sctp_connectx() won't be able to run under such circumstances. Introduce a compat interface in the kernel to deal with such situations by using a 'struct compat_sctp_getaddrs_old' structure where user data is copied into it, and then sucessively transformed into a 'struct sctp_getaddrs_old' structure with the help of compat_ptr(). That fixes sctp_connectx() abi without any changes needed in user space, and lets the SCTP test suite pass when compiled in 32bit and run on 64bit kernels. Fixes: f9c67811ebc0 ("sctp: Fix regression introduced by new sctp_connectx api") Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7075ac8..981aaf8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -1368,11 +1369,19 @@ static int sctp_setsockopt_connectx(struct sock *sk, /* * New (hopefully final) interface for the API. * We use the sctp_getaddrs_old structure so that use-space library - * can avoid any unnecessary allocations. The only defferent part + * can avoid any unnecessary allocations. The only different part * is that we store the actual length of the address buffer into the - * addrs_num structure member. That way we can re-use the existing + * addrs_num structure member. That way we can re-use the existing * code. */ +#ifdef CONFIG_COMPAT +struct compat_sctp_getaddrs_old { + sctp_assoc_t assoc_id; + s32 addr_num; + compat_uptr_t addrs; /* struct sockaddr * */ +}; +#endif + static int sctp_getsockopt_connectx3(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -1381,16 +1390,30 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len, sctp_assoc_t assoc_id = 0; int err = 0; - if (len < sizeof(param)) - return -EINVAL; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + struct compat_sctp_getaddrs_old param32; - if (copy_from_user(¶m, optval, sizeof(param))) - return -EFAULT; + if (len < sizeof(param32)) + return -EINVAL; + if (copy_from_user(¶m32, optval, sizeof(param32))) + return -EFAULT; - err = __sctp_setsockopt_connectx(sk, - (struct sockaddr __user *)param.addrs, - param.addr_num, &assoc_id); + param.assoc_id = param32.assoc_id; + param.addr_num = param32.addr_num; + param.addrs = compat_ptr(param32.addrs); + } else +#endif + { + if (len < sizeof(param)) + return -EINVAL; + if (copy_from_user(¶m, optval, sizeof(param))) + return -EFAULT; + } + err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *) + param.addrs, param.addr_num, + &assoc_id); if (err == 0 || err == -EINPROGRESS) { if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) return -EFAULT; -- cgit v0.10.2 From d7cf0c34af067555737193b6c1aa7abaa677f29c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 18 Feb 2014 15:20:51 +0300 Subject: af_packet: remove a stray tab in packet_set_ring() At first glance it looks like there is a missing curly brace but actually the code works the same either way. I have adjusted the indenting but left the code the same. Signed-off-by: Dan Carpenter Acked-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5dc116..48a6a93 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3804,7 +3804,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, */ if (!tx_ring) init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); - break; + break; default: break; } -- cgit v0.10.2 From ce5eaf023a231ebd313ecc7e0b33278513d8ad80 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 18 Feb 2014 12:55:42 +0000 Subject: NET: fec: only enable napi if we are successful If napi is left enabled after a failed attempt to bring the interface up, we BUG: fec 2188000.ethernet eth0: no PHY, assuming direct connection to switch libphy: PHY fixed-0:00 not found fec 2188000.ethernet eth0: could not attach to PHY ------------[ cut here ]------------ kernel BUG at include/linux/netdevice.h:502! Internal error: Oops - BUG: 0 [#1] SMP ARM ... PC is at fec_enet_open+0x4d0/0x500 LR is at __dev_open+0xa4/0xfc Only enable napi after we are past all the failure paths. Signed-off-by: Russell King Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d4782b4..903362a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1778,8 +1778,6 @@ fec_enet_open(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); int ret; - napi_enable(&fep->napi); - /* I should reset the ring buffers here, but I don't yet know * a simple way to do that. */ @@ -1794,6 +1792,8 @@ fec_enet_open(struct net_device *ndev) fec_enet_free_buffers(ndev); return ret; } + + napi_enable(&fep->napi); phy_start(fep->phy_dev); netif_start_queue(ndev); fep->opened = 1; -- cgit v0.10.2 From 4b636b535d55f89077691e737930a615422113d6 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 18 Feb 2014 14:18:11 +0100 Subject: net: ethernet: update dependency and help text of mvneta With the introduction of the support for Armada 375 and Armada 38x, the hidden Kconfig option MACH_ARMADA_370_XP is being renamed to MACH_MVEBU_V7. Therefore, the dependency that was used for the mvneta driver can no longer work. This commit replaces this dependency by a dependency on PLAT_ORION, which is used similarly for the mv643xx_eth driver. In addition to this, it takes this opportunity to adjust the description and help text to indicate that the driver can is also used for Armada 38x. Note that Armada 375 cannot use this driver as it has a completely different networking unit, which will require a separate driver. Signed-off-by: Thomas Petazzoni Acked-by: Jason Cooper Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index 6300fd2..68e6a66 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -43,12 +43,12 @@ config MVMDIO This driver is used by the MV643XX_ETH and MVNETA drivers. config MVNETA - tristate "Marvell Armada 370/XP network interface support" - depends on MACH_ARMADA_370_XP + tristate "Marvell Armada 370/38x/XP network interface support" + depends on PLAT_ORION select MVMDIO ---help--- This driver supports the network interface units in the - Marvell ARMADA XP and ARMADA 370 SoC family. + Marvell ARMADA XP, ARMADA 370 and ARMADA 38x SoC family. Note that this driver is distinct from the mv643xx_eth driver, which should be used for the older Marvell SoCs -- cgit v0.10.2 From f15c586d1d9d99b04712d2b2eeee31ab29db77ff Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 18 Feb 2014 12:16:58 +0000 Subject: of_mdio: fix phy interrupt passing The of_mdiobus_register_phy() is not setting phy->irq thus causing some drivers to incorrectly assume that the PHY does not have an IRQ associated with it. Not only do some drivers report no IRQ they do not install an interrupt handler for the PHY. Simplify the code setting irq and set the phy->irq at the same time so that we cover the following issues, which should cover all the cases the code will find: - Set phy->irq if node has irq property and mdio->irq is NULL - Set phy->irq if node has no irq and mdio->irq is not NULL - Leave phy->irq as PHY_POLL default if none of the above This fixes the issue: net eth0: attached PHY 1 (IRQ -1) to driver Micrel KSZ8041RNLI to the correct: net eth0: attached PHY 1 (IRQ 416) to driver Micrel KSZ8041RNLI Signed-off-by: Ben Dooks Signed-off-by: David S. Miller diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 495facc..5b3c24f 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -48,7 +48,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi { struct phy_device *phy; bool is_c45; - int rc, prev_irq; + int rc; u32 max_speed = 0; is_c45 = of_device_is_compatible(child, @@ -58,12 +58,14 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi if (!phy || IS_ERR(phy)) return 1; - if (mdio->irq) { - prev_irq = mdio->irq[addr]; - mdio->irq[addr] = - irq_of_parse_and_map(child, 0); - if (!mdio->irq[addr]) - mdio->irq[addr] = prev_irq; + rc = irq_of_parse_and_map(child, 0); + if (rc > 0) { + phy->irq = rc; + if (mdio->irq) + mdio->irq[addr] = rc; + } else { + if (mdio->irq) + phy->irq = mdio->irq[addr]; } /* Associate the OF node with the device structure so it -- cgit v0.10.2 From 22f08ad9721d4d1a92061b60026144627a60d644 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 18 Feb 2014 09:47:49 -0800 Subject: MAINTAINERS: add entry for the PHY library The PHY library has been subject to some changes, new drivers and DT interactions over the past few months. Add myself as a maintainer for the core PHY library parts and drivers. Make sure the PHY library entry also covers the Device Tree files which have a close interaction with the MDIO bus, PHY connection and Ethernet PHY mode parsing. CC: Grant Likely CC: Shaohui Xie CC: Andy Fleming Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 091b50e..c8b3975 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3324,6 +3324,17 @@ S: Maintained F: include/linux/netfilter_bridge/ F: net/bridge/ +ETHERNET PHY LIBRARY +M: Florian Fainelli +L: netdev@vger.kernel.org +S: Maintained +F: include/linux/phy.h +F: include/linux/phy_fixed.h +F: drivers/net/phy/ +F: Documentation/networking/phy.txt +F: drivers/of/of_mdio.c +F: drivers/of/of_net.c + EXT2 FILE SYSTEM M: Jan Kara L: linux-ext4@vger.kernel.org -- cgit v0.10.2