From 180032973ee97daddf5c9d733e5b425b108f8679 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 29 Aug 2013 13:26:57 +0300 Subject: cfg80211: use the correct macro to check for active monitor support Use MONITOR_FLAG_ACTIVE, which is a flag mask, instead of NL80211_MNTR_FLAG_ACTIVE, which is a flag index, when checking if the hardware supports active monitoring. Cc: stable@vger.kernel.org Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index af8d84a..626dc3b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2421,7 +2421,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } - if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + if (flags && (*flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; @@ -2483,7 +2483,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + if (!err && (flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; -- cgit v0.10.2 From f478f33a93f9353dcd1fe55445343d76b1c3f84a Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Thu, 26 Sep 2013 16:55:28 +0100 Subject: cfg80211: fix warning when using WEXT for IBSS Fix kernel warning when using WEXT for configuring ad-hoc mode, e.g. "iwconfig wlan0 essid test channel 1" WARNING: at net/wireless/chan.c:373 cfg80211_chandef_usable+0x50/0x21c [cfg80211]() The warning is caused by an uninitialized variable center_freq1. Cc: stable@vger.kernel.org Signed-off-by: Bruno Randolf Signed-off-by: Johannes Berg diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 39bff7d..403fe29 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -263,6 +263,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (chan->flags & IEEE80211_CHAN_DISABLED) continue; wdev->wext.ibss.chandef.chan = chan; + wdev->wext.ibss.chandef.center_freq1 = + chan->center_freq; break; } @@ -347,6 +349,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (chan) { wdev->wext.ibss.chandef.chan = chan; wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + wdev->wext.ibss.chandef.center_freq1 = freq; wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ -- cgit v0.10.2 From 6329b8d917adc077caa60c2447385554130853a3 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 17 Sep 2013 11:15:43 +0200 Subject: mac80211: drop spoofed packets in ad-hoc mode If an Ad-Hoc node receives packets with the Cell ID or its own MAC address as source address, it hits a WARN_ON in sta_info_insert_check() With many packets, this can massively spam the logs. One way that this can easily happen is through having Cisco APs in the area with rouge AP detection and countermeasures enabled. Such Cisco APs will regularly send fake beacons, disassoc and deauth packets that trigger these warnings. To fix this issue, drop such spoofed packets early in the rx path. Cc: stable@vger.kernel.org Reported-by: Thomas Huehn Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 54395d7..674eac1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3056,6 +3056,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; + if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || + ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) + return 0; if (ieee80211_is_beacon(hdr->frame_control)) { return 1; } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { -- cgit v0.10.2 From cc63ec766b7821c8dc4dc1d1e980124aea96b553 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Sat, 7 Sep 2013 23:40:44 -0700 Subject: mac80211: fix the setting of extended supported rate IE The patch "mac80211: select and adjust bitrates according to channel mode" causes regression and breaks the extended supported rate IE setting. Since "i" is starting with 8, so this is not necessary to introduce "skip" here. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Colleen Twitty Reviewed-by: Jason Abele Signed-off-by: Johannes Berg diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e1b34a1..9c3200b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2103,7 +2103,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, skip, shift; + int rate, shift; u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; @@ -2131,14 +2131,11 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, exrates + 2); *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = exrates; - skip = 0; for (i = 8; i < sband->n_bitrates; i++) { u8 basic = 0; if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; - if (skip++ < 8) - continue; if (need_basic && basic_rates & BIT(i)) basic = 0x80; rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, -- cgit v0.10.2 From aa5f66d5a1df1c2b04bccdcb19711675c765d7c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Sep 2013 20:03:45 +0200 Subject: cfg80211: fix sysfs registration race My locking rework/race fixes caused a regression in the registration, causing uevent notifications for wireless devices before the device is really fully registered and available in nl80211. Fix this by moving the device_add() under rtnl and move the rfkill to afterwards (it can't be under rtnl.) Reported-and-tested-by: Maxime Bizon Signed-off-by: Johannes Berg diff --git a/net/wireless/core.c b/net/wireless/core.c index 6715396..fe8d4f2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -566,18 +566,13 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - + rtnl_lock(); res = device_add(&rdev->wiphy.dev); - if (res) - return res; - - res = rfkill_register(rdev->rfkill); if (res) { - device_del(&rdev->wiphy.dev); + rtnl_unlock(); return res; } - rtnl_lock(); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -606,6 +601,15 @@ int wiphy_register(struct wiphy *wiphy) rdev->wiphy.registered = true; rtnl_unlock(); + + res = rfkill_register(rdev->rfkill); + if (res) { + rfkill_destroy(rdev->rfkill); + rdev->rfkill = NULL; + wiphy_unregister(&rdev->wiphy); + return res; + } + return 0; } EXPORT_SYMBOL(wiphy_register); @@ -640,7 +644,8 @@ void wiphy_unregister(struct wiphy *wiphy) rtnl_unlock(); __count == 0; })); - rfkill_unregister(rdev->rfkill); + if (rdev->rfkill) + rfkill_unregister(rdev->rfkill); rtnl_lock(); rdev->wiphy.registered = false; -- cgit v0.10.2 From dfb6b7c109a7f98d324a759599d1b4616f02c79f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 23 Sep 2013 04:08:13 +0200 Subject: Revert "rt2x00pci: Use PCI MSIs whenever possible" This reverts commit 9483f40d8d01918b399b4e24d0c1111db0afffeb. Some devices stop to connect with above commit, see: https://bugzilla.kernel.org/show_bug.cgi?id=61621 Since there is no clear benefit of having MSI enabled, just revert change to fix the problem. Cc: stable@vger.kernel.org # 3.11+ Signed-off-by: Stanislaw Gruszka Acked-by: Jakub Kicinski Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rt2x00/rt2x00pci.c b/drivers/net/wireless/rt2x00/rt2x00pci.c index 76d95de..dc49e52 100644 --- a/drivers/net/wireless/rt2x00/rt2x00pci.c +++ b/drivers/net/wireless/rt2x00/rt2x00pci.c @@ -105,13 +105,11 @@ int rt2x00pci_probe(struct pci_dev *pci_dev, const struct rt2x00_ops *ops) goto exit_release_regions; } - pci_enable_msi(pci_dev); - hw = ieee80211_alloc_hw(sizeof(struct rt2x00_dev), ops->hw); if (!hw) { rt2x00_probe_err("Failed to allocate hardware\n"); retval = -ENOMEM; - goto exit_disable_msi; + goto exit_release_regions; } pci_set_drvdata(pci_dev, hw); @@ -152,9 +150,6 @@ exit_free_reg: exit_free_device: ieee80211_free_hw(hw); -exit_disable_msi: - pci_disable_msi(pci_dev); - exit_release_regions: pci_release_regions(pci_dev); @@ -179,8 +174,6 @@ void rt2x00pci_remove(struct pci_dev *pci_dev) rt2x00pci_free_reg(rt2x00dev); ieee80211_free_hw(hw); - pci_disable_msi(pci_dev); - /* * Free the PCI device data. */ -- cgit v0.10.2 From 453b0c3f6910672f79da354077af728d92f95c5b Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 27 Sep 2013 10:55:38 -0700 Subject: mwifiex: fix SDIO interrupt lost issue 601216e "mwifiex: process RX packets in SDIO IRQ thread directly" introduced a command timeout issue which can be reproduced easily on an AM33xx platform using a test application written by Daniel Mack: https://gist.github.com/zonque/6579314 mwifiex_main_process() is called from both the SDIO handler and the workqueue. In case an interrupt occurs right after the int_status check, but before updating the mwifiex_processing flag, this interrupt gets lost, resulting in a command timeout and consequently a card reset. Let main_proc_lock protect both int_status and mwifiex_processing flag. This fixes the interrupt lost issue. Cc: # 3.7+ Reported-by: Sven Neumann Reported-by: Andreas Fenkart Tested-by: Daniel Mack Reviewed-by: Dylan Reid Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Paul Stewart Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index fd77833..c2b91f5 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -358,10 +358,12 @@ process_start: } } while (true); - if ((adapter->int_status) || IS_CARD_RX_RCVD(adapter)) + spin_lock_irqsave(&adapter->main_proc_lock, flags); + if ((adapter->int_status) || IS_CARD_RX_RCVD(adapter)) { + spin_unlock_irqrestore(&adapter->main_proc_lock, flags); goto process_start; + } - spin_lock_irqsave(&adapter->main_proc_lock, flags); adapter->mwifiex_processing = false; spin_unlock_irqrestore(&adapter->main_proc_lock, flags); -- cgit v0.10.2 From f69727fd78fa761dc49ee3091c432a8c6ab81292 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 29 Sep 2013 13:06:31 +0200 Subject: ath9k: fix powersave response handling for BA session packets When a packet is passed from mac80211 to the driver with the IEEE80211_TX_CTL_PS_RESPONSE flag set, it bypasses the normal driver internal queueing and goes directly to the UAPSD queue. When that happens, packets that are part of a BlockAck session still need to be tracked as such inside the driver, otherwise it will create discrepancies in the receiver BA reorder window, causing traffic stalls. This only happens in AP mode with powersave-enabled clients. This patch fixes the regression introduced in the commit "ath9k: use software queues for un-aggregated data packets" Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 5ac713d..dd30452 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1969,15 +1969,18 @@ static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq, static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq, struct ath_atx_tid *tid, struct sk_buff *skb) { + struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ath_frame_info *fi = get_frame_info(skb); struct list_head bf_head; - struct ath_buf *bf; - - bf = fi->bf; + struct ath_buf *bf = fi->bf; INIT_LIST_HEAD(&bf_head); list_add_tail(&bf->list, &bf_head); bf->bf_state.bf_type = 0; + if (tid && (tx_info->flags & IEEE80211_TX_CTL_AMPDU)) { + bf->bf_state.bf_type = BUF_AMPDU; + ath_tx_addto_baw(sc, tid, bf); + } bf->bf_next = NULL; bf->bf_lastbf = bf; -- cgit v0.10.2 From 6c519bad7b19a2c14a075b400edabaa630330123 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 27 Sep 2013 18:03:39 +0200 Subject: batman-adv: set up network coding packet handlers during module init batman-adv saves its table of packet handlers as a global state, so handlers must be set up only once (and setting them up a second time will fail). The recently-added network coding support tries to set up its handler each time a new softif is registered, which obviously fails when more that one softif is used (and in consequence, the softif creation fails). Fix this by splitting up batadv_nc_init into batadv_nc_init (which is called only once) and batadv_nc_mesh_init (which is called for each softif); in addition batadv_nc_free is renamed to batadv_nc_mesh_free to keep naming consistent. Signed-off-by: Matthias Schiffer Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c72d1bc..1356af6 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -65,6 +65,7 @@ static int __init batadv_init(void) batadv_recv_handler_init(); batadv_iv_init(); + batadv_nc_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); @@ -142,7 +143,7 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - ret = batadv_nc_init(bat_priv); + ret = batadv_nc_mesh_init(bat_priv); if (ret < 0) goto err; @@ -167,7 +168,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_vis_quit(bat_priv); batadv_gw_node_purge(bat_priv); - batadv_nc_free(bat_priv); + batadv_nc_mesh_free(bat_priv); batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index a487d46..4ecc0b6 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); /** + * batadv_nc_init - one-time initialization for network coding + */ +int __init batadv_nc_init(void) +{ + int ret; + + /* Register our packet type */ + ret = batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet); + + return ret; +} + +/** * batadv_nc_start_timer - initialise the nc periodic worker * @bat_priv: the bat priv with all the soft interface information */ @@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_nc_init - initialise coding hash table and start house keeping + * batadv_nc_mesh_init - initialise coding hash table and start house keeping * @bat_priv: the bat priv with all the soft interface information */ -int batadv_nc_init(struct batadv_priv *bat_priv) +int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { bat_priv->nc.timestamp_fwd_flush = jiffies; bat_priv->nc.timestamp_sniffed_purge = jiffies; @@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv) batadv_hash_set_lock_class(bat_priv->nc.coding_hash, &batadv_nc_decoding_hash_lock_class_key); - /* Register our packet type */ - if (batadv_recv_handler_register(BATADV_CODED, - batadv_nc_recv_coded_packet) < 0) - goto err; - INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); batadv_nc_start_timer(bat_priv); @@ -1721,12 +1730,11 @@ free_nc_packet: } /** - * batadv_nc_free - clean up network coding memory + * batadv_nc_mesh_free - clean up network coding memory * @bat_priv: the bat priv with all the soft interface information */ -void batadv_nc_free(struct batadv_priv *bat_priv) +void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { - batadv_recv_handler_unregister(BATADV_CODED); cancel_delayed_work_sync(&bat_priv->nc.work); batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 85a4ec8..ddfa618 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -22,8 +22,9 @@ #ifdef CONFIG_BATMAN_ADV_NC -int batadv_nc_init(struct batadv_priv *bat_priv); -void batadv_nc_free(struct batadv_priv *bat_priv); +int batadv_nc_init(void); +int batadv_nc_mesh_init(struct batadv_priv *bat_priv); +void batadv_nc_mesh_free(struct batadv_priv *bat_priv); void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, @@ -46,12 +47,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_NC */ -static inline int batadv_nc_init(struct batadv_priv *bat_priv) +static inline int batadv_nc_init(void) { return 0; } -static inline void batadv_nc_free(struct batadv_priv *bat_priv) +static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { return; } -- cgit v0.10.2 From e727ca82e0e9616ab4844301e6bae60ca7327682 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:06 +0200 Subject: proc connector: fix info leaks Initialize event_data for all possible message types to prevent leaking kernel stack contents to userland (up to 20 bytes). Also set the flags member of the connector message to 0 to prevent leaking two more stack bytes this way. Cc: stable@vger.kernel.org # v2.6.15+ Signed-off-by: Mathias Krause Signed-off-by: David S. Miller diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 08ae128..c73fc2b 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -65,6 +65,7 @@ void proc_fork_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -80,6 +81,7 @@ void proc_fork_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ /* If cn_netlink_send() failed, the data is not sent */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -96,6 +98,7 @@ void proc_exec_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -106,6 +109,7 @@ void proc_exec_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -122,6 +126,7 @@ void proc_id_connector(struct task_struct *task, int which_id) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); ev->what = which_id; ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; @@ -145,6 +150,7 @@ void proc_id_connector(struct task_struct *task, int which_id) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -160,6 +166,7 @@ void proc_sid_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -170,6 +177,7 @@ void proc_sid_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -185,6 +193,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -203,6 +212,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -218,6 +228,7 @@ void proc_comm_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -229,6 +240,7 @@ void proc_comm_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -244,6 +256,7 @@ void proc_coredump_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -254,6 +267,7 @@ void proc_coredump_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -269,6 +283,7 @@ void proc_exit_connector(struct task_struct *task) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -281,6 +296,7 @@ void proc_exit_connector(struct task_struct *task) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } @@ -304,6 +320,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) msg = (struct cn_msg *)buffer; ev = (struct proc_event *)msg->data; + memset(&ev->event_data, 0, sizeof(ev->event_data)); msg->seq = rcvd_seq; ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); @@ -313,6 +330,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = rcvd_ack + 1; msg->len = sizeof(*ev); + msg->flags = 0; /* not used */ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); } -- cgit v0.10.2 From 162b2bedc084d2d908a04c93383ba02348b648b0 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:07 +0200 Subject: connector: use nlmsg_len() to check message length The current code tests the length of the whole netlink message to be at least as long to fit a cn_msg. This is wrong as nlmsg_len includes the length of the netlink message header. Use nlmsg_len() instead to fix this "off-by-NLMSG_HDRLEN" size check. Cc: stable@vger.kernel.org # v2.6.14+ Signed-off-by: Mathias Krause Signed-off-by: David S. Miller diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 6ecfa75..0daa11e 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -157,17 +157,18 @@ static int cn_call_callback(struct sk_buff *skb) static void cn_rx_skb(struct sk_buff *__skb) { struct nlmsghdr *nlh; - int err; struct sk_buff *skb; + int len, err; skb = skb_get(__skb); if (skb->len >= NLMSG_HDRLEN) { nlh = nlmsg_hdr(skb); + len = nlmsg_len(nlh); - if (nlh->nlmsg_len < sizeof(struct cn_msg) || + if (len < (int)sizeof(struct cn_msg) || skb->len < nlh->nlmsg_len || - nlh->nlmsg_len > CONNECTOR_MAX_MSG_SIZE) { + len > CONNECTOR_MAX_MSG_SIZE) { kfree_skb(skb); return; } -- cgit v0.10.2 From ac73bf50b709dea4e39635151b861b8a8f52bfbb Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:08 +0200 Subject: connector: use 'size' everywhere in cn_netlink_send() We calculated the size for the netlink message buffer as size. Use size in the memcpy() call as well instead of recalculating it. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 0daa11e..a36749f 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -109,7 +109,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask) data = nlmsg_data(nlh); - memcpy(data, msg, sizeof(*data) + msg->len); + memcpy(data, msg, size); NETLINK_CB(skb).dst_group = group; -- cgit v0.10.2 From 05742faf2512b94907f49b21581ed4667b9bd702 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:03:09 +0200 Subject: connector - documentation: simplify netlink message length assignment Use the precalculated size instead of obfuscating the message length calculation by first subtracting the netlink header length from size and then use the NLMSG_LENGTH() macro to add it back again. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c index 4848db8..8a4da64 100644 --- a/Documentation/connector/ucon.c +++ b/Documentation/connector/ucon.c @@ -71,7 +71,7 @@ static int netlink_send(int s, struct cn_msg *msg) nlh->nlmsg_seq = seq++; nlh->nlmsg_pid = getpid(); nlh->nlmsg_type = NLMSG_DONE; - nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh)); + nlh->nlmsg_len = size; nlh->nlmsg_flags = 0; m = NLMSG_DATA(nlh); -- cgit v0.10.2 From 6865d1e834be84ddd5808d93d5035b492346c64a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:05:40 +0200 Subject: unix_diag: fix info leak When filling the netlink message we miss to wipe the pad field, therefore leak one byte of heap memory to userland. Fix this by setting pad to 0. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller diff --git a/net/unix/diag.c b/net/unix/diag.c index d591091..86fa0f3 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; + rep->pad = 0; rep->udiag_ino = sk_ino; sock_diag_save_cookie(sk, rep->udiag_cookie); -- cgit v0.10.2 From 8c1c58ec70ce45cd5c7866811e90df131e7a3005 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 Sep 2013 15:12:15 -0500 Subject: net: calxedaxgmac: fix clearing of old filter addresses In commit 2ee68f621af280 (net: calxedaxgmac: fix various errors in xgmac_set_rx_mode), a fix to clean-up old address entries was added. However, the loop to zero out the entries failed to increment the register address resulting in only 1 entry getting cleared. Fix this to correctly use the loop index. Also, the end of the loop condition was off by 1 and should have been <= rather than <. Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 78d6d6b..94358d2 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1342,8 +1342,8 @@ static void xgmac_set_rx_mode(struct net_device *dev) } out: - for (i = reg; i < XGMAC_MAX_FILTER_ADDR; i++) - xgmac_set_mac_addr(ioaddr, NULL, reg); + for (i = reg; i <= XGMAC_MAX_FILTER_ADDR; i++) + xgmac_set_mac_addr(ioaddr, NULL, i); for (i = 0; i < XGMAC_NUM_HASH; i++) writel(hash_filter[i], ioaddr + XGMAC_HASH(i)); -- cgit v0.10.2 From 8a8c3f5bebe2455200cd488aea8546d01645b06b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 Sep 2013 15:12:16 -0500 Subject: net: calxedaxgmac: add uc and mc filter addresses in promiscuous mode Even in promiscuous mode, we need to add filter addresses for correct operation. This fixes silent failures when using a bridge and adding addresses using the "bridge fdb add" command. Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 94358d2..35da09b 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1291,10 +1291,8 @@ static void xgmac_set_rx_mode(struct net_device *dev) netdev_dbg(priv->dev, "# mcasts %d, # unicast %d\n", netdev_mc_count(dev), netdev_uc_count(dev)); - if (dev->flags & IFF_PROMISC) { - writel(XGMAC_FRAME_FILTER_PR, ioaddr + XGMAC_FRAME_FILTER); - return; - } + if (dev->flags & IFF_PROMISC) + value |= XGMAC_FRAME_FILTER_PR; memset(hash_filter, 0, sizeof(hash_filter)); -- cgit v0.10.2 From 0cf2f380073867e5d0cfb526fdf9bee0e7f8a9c6 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 Sep 2013 15:12:17 -0500 Subject: net: calxedaxgmac: determine number of address filters at runtime Highbank and Midway xgmac h/w have different number of MAC address filter registers with 7 and 31, respectively. Highbank has been wrong, so fix it and detect the number of filter registers at run-time. Unfortunately, the version register is the same on both SOCs, so simply test if write to the last filter register will take a value. It always reads as 0 if not. Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 35da09b..48f5288 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -106,7 +106,6 @@ #define XGMAC_DMA_HW_FEATURE 0x00000f58 /* Enabled Hardware Features */ #define XGMAC_ADDR_AE 0x80000000 -#define XGMAC_MAX_FILTER_ADDR 31 /* PMT Control and Status */ #define XGMAC_PMT_POINTER_RESET 0x80000000 @@ -384,6 +383,7 @@ struct xgmac_priv { struct device *device; struct napi_struct napi; + int max_macs; struct xgmac_extra_stats xstats; spinlock_t stats_lock; @@ -1296,7 +1296,7 @@ static void xgmac_set_rx_mode(struct net_device *dev) memset(hash_filter, 0, sizeof(hash_filter)); - if (netdev_uc_count(dev) > XGMAC_MAX_FILTER_ADDR) { + if (netdev_uc_count(dev) > priv->max_macs) { use_hash = true; value |= XGMAC_FRAME_FILTER_HUC | XGMAC_FRAME_FILTER_HPF; } @@ -1319,7 +1319,7 @@ static void xgmac_set_rx_mode(struct net_device *dev) goto out; } - if ((netdev_mc_count(dev) + reg - 1) > XGMAC_MAX_FILTER_ADDR) { + if ((netdev_mc_count(dev) + reg - 1) > priv->max_macs) { use_hash = true; value |= XGMAC_FRAME_FILTER_HMC | XGMAC_FRAME_FILTER_HPF; } else { @@ -1340,7 +1340,7 @@ static void xgmac_set_rx_mode(struct net_device *dev) } out: - for (i = reg; i <= XGMAC_MAX_FILTER_ADDR; i++) + for (i = reg; i <= priv->max_macs; i++) xgmac_set_mac_addr(ioaddr, NULL, i); for (i = 0; i < XGMAC_NUM_HASH; i++) writel(hash_filter[i], ioaddr + XGMAC_HASH(i)); @@ -1759,6 +1759,13 @@ static int xgmac_probe(struct platform_device *pdev) uid = readl(priv->base + XGMAC_VERSION); netdev_info(ndev, "h/w version is 0x%x\n", uid); + /* Figure out how many valid mac address filter registers we have */ + writel(1, priv->base + XGMAC_ADDR_HIGH(31)); + if (readl(priv->base + XGMAC_ADDR_HIGH(31)) == 1) + priv->max_macs = 31; + else + priv->max_macs = 7; + writel(0, priv->base + XGMAC_DMA_INTR_ENA); ndev->irq = platform_get_irq(pdev, 0); if (ndev->irq == -ENXIO) { -- cgit v0.10.2 From 58e4e1f6cacddb7823c44bcfb272174553f6c645 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 30 Sep 2013 13:29:08 -0700 Subject: igb: Avoid uninitialized advertised variable in eee_set_cur eee_get_cur assumes that the output data is already zeroed. It can read-modify-write the advertised field: if (ipcnfg & E1000_IPCNFG_EEE_100M_AN) 2594 edata->advertised |= ADVERTISED_100baseT_Full; This is ok for the normal ethtool eee_get call, which always zeroes the input data before. But eee_set_cur also calls eee_get_cur and it did not zero the input field. Later on it then compares agsinst the field, which can contain partial stack garbage. Zero the input field in eee_set_cur() too. Cc: jeffrey.t.kirsher@intel.com Cc: netdev@vger.kernel.org Signed-off-by: Andi Kleen Acked-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 86d5142..151e00c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2655,6 +2655,8 @@ static int igb_set_eee(struct net_device *netdev, (hw->phy.media_type != e1000_media_type_copper)) return -EOPNOTSUPP; + memset(&eee_curr, 0, sizeof(struct ethtool_eee)); + ret_val = igb_get_eee(netdev, &eee_curr); if (ret_val) return ret_val; -- cgit v0.10.2 From 5843ef421311c34f06d5ab82bced5aebfe185b2c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 30 Sep 2013 13:29:11 -0700 Subject: tcp: Always set options to 0 before calling tcp_established_options tcp_established_options assumes opts->options is 0 before calling, as it read modify writes it. For the tcp_current_mss() case the opts structure is not zeroed, so this can be done with uninitialized values. This is ok, because ->options is not read in this path. But it's still better to avoid the operation on the uninitialized field. This shuts up a static code analyzer, and presumably may help the optimizer. Cc: netdev@vger.kernel.org Signed-off-by: Andi Kleen Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e6bb825..c6f01f2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb unsigned int size = 0; unsigned int eff_sacks; + opts->options = 0; + #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { -- cgit v0.10.2 From 5bc3db5c9ca8407f52918b6504d3b27230defedc Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Sep 2013 21:30:22 -0700 Subject: tc: export tc_defact.h to userspace Jamal sent patch to add tc user simple actions to iproute2 but required header was not being exported. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h deleted file mode 100644 index 6f65d07..0000000 --- a/include/linux/tc_act/tc_defact.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef __LINUX_TC_DEF_H -#define __LINUX_TC_DEF_H - -#include - -struct tc_defact { - tc_gen; -}; - -enum { - TCA_DEF_UNSPEC, - TCA_DEF_TM, - TCA_DEF_PARMS, - TCA_DEF_DATA, - __TCA_DEF_MAX -}; -#define TCA_DEF_MAX (__TCA_DEF_MAX - 1) - -#endif diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 0623ec4..56f1216 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list header-y += tc_csum.h +header-y += tc_defact.h header-y += tc_gact.h header-y += tc_ipt.h header-y += tc_mirred.h diff --git a/include/uapi/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h new file mode 100644 index 0000000..17dddb4 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_defact.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_TC_DEF_H +#define __LINUX_TC_DEF_H + +#include + +struct tc_defact { + tc_gen; +}; + +enum { + TCA_DEF_UNSPEC, + TCA_DEF_TM, + TCA_DEF_PARMS, + TCA_DEF_DATA, + __TCA_DEF_MAX +}; +#define TCA_DEF_MAX (__TCA_DEF_MAX - 1) + +#endif -- cgit v0.10.2 From 28ad7b06f4a7670d5794d3751f5c68a7e5e437a8 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 1 Oct 2013 21:23:25 -0400 Subject: bonding: update MAINTAINERS Veaceslav has been doing a significant amount of work on bonding lately and reached out to me about being a maintainer. After discussing this with him, I think he would be a good fit as a bonding maintainer. Signed-off-by: Andy Gospodarek Acked-by: Veaceslav Falico Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 744a239..43dfc82 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1785,6 +1785,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh +M: Veaceslav Falico M: Andy Gospodarek L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ -- cgit v0.10.2 From 80ad1d61e72d626e30ebe8529a0455e660ca4693 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Oct 2013 21:04:11 -0700 Subject: net: do not call sock_put() on TIMEWAIT sockets commit 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls") incorrectly used sock_put() on TIMEWAIT sockets. We should instead use inet_twsk_put() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 7bd8983..96da9c7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -287,7 +287,7 @@ begintw: if (unlikely(!INET_TW_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 32b4a16..066640e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -116,7 +116,7 @@ begintw: } if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; -- cgit v0.10.2 From e18503f41f9b12132c95d7c31ca6ee5155e44e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Cachereul?= Date: Wed, 2 Oct 2013 10:16:02 +0200 Subject: l2tp: fix kernel panic when using IPv4-mapped IPv6 addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IPv4 mapped addresses cause kernel panic. The patch juste check whether the IPv6 address is an IPv4 mapped address. If so, use IPv4 API instead of IPv6. [ 940.026915] general protection fault: 0000 [#1] [ 940.026915] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppox ppp_generic slhc loop psmouse [ 940.026915] CPU: 0 PID: 3184 Comm: memcheck-amd64- Not tainted 3.11.0+ #1 [ 940.026915] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 940.026915] task: ffff880007130e20 ti: ffff88000737e000 task.ti: ffff88000737e000 [ 940.026915] RIP: 0010:[] [] ip6_xmit+0x276/0x326 [ 940.026915] RSP: 0018:ffff88000737fd28 EFLAGS: 00010286 [ 940.026915] RAX: c748521a75ceff48 RBX: ffff880000c30800 RCX: 0000000000000000 [ 940.026915] RDX: ffff88000075cc4e RSI: 0000000000000028 RDI: ffff8800060e5a40 [ 940.026915] RBP: ffff8800060e5a40 R08: 0000000000000000 R09: ffff88000075cc90 [ 940.026915] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88000737fda0 [ 940.026915] R13: 0000000000000000 R14: 0000000000002000 R15: ffff880005d3b580 [ 940.026915] FS: 00007f163dc5e800(0000) GS:ffffffff81623000(0000) knlGS:0000000000000000 [ 940.026915] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 940.026915] CR2: 00000004032dc940 CR3: 0000000005c25000 CR4: 00000000000006f0 [ 940.026915] Stack: [ 940.026915] ffff88000075cc4e ffffffff81694e90 ffff880000c30b38 0000000000000020 [ 940.026915] 11000000523c4bac ffff88000737fdb4 0000000000000000 ffff880000c30800 [ 940.026915] ffff880005d3b580 ffff880000c30b38 ffff8800060e5a40 0000000000000020 [ 940.026915] Call Trace: [ 940.026915] [] ? inet6_csk_xmit+0xa4/0xc4 [ 940.026915] [] ? l2tp_xmit_skb+0x503/0x55a [l2tp_core] [ 940.026915] [] ? pskb_expand_head+0x161/0x214 [ 940.026915] [] ? pppol2tp_xmit+0xf2/0x143 [l2tp_ppp] [ 940.026915] [] ? ppp_channel_push+0x36/0x8b [ppp_generic] [ 940.026915] [] ? ppp_write+0xaf/0xc5 [ppp_generic] [ 940.026915] [] ? vfs_write+0xa2/0x106 [ 940.026915] [] ? SyS_write+0x56/0x8a [ 940.026915] [] ? system_call_fastpath+0x16/0x1b [ 940.026915] Code: 00 49 8b 8f d8 00 00 00 66 83 7c 11 02 00 74 60 49 8b 47 58 48 83 e0 fe 48 8b 80 18 01 00 00 48 85 c0 74 13 48 8b 80 78 02 00 00 <48> ff 40 28 41 8b 57 68 48 01 50 30 48 8b 54 24 08 49 c7 c1 51 [ 940.026915] RIP [] ip6_xmit+0x276/0x326 [ 940.026915] RSP [ 940.057945] ---[ end trace be8aba9a61c8b7f3 ]--- [ 940.058583] Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: François CACHEREUL Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index feae495..aedaa2c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -496,6 +496,7 @@ out: static inline int l2tp_verify_udp_checksum(struct sock *sk, struct sk_buff *skb) { + struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; struct udphdr *uh = udp_hdr(skb); u16 ulen = ntohs(uh->len); __wsum psum; @@ -504,7 +505,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return 0; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { if (!uh->check) { LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); return 1; @@ -1128,7 +1129,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (skb->sk->sk_family == PF_INET6) + if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(skb, NULL); else #endif @@ -1255,7 +1256,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) l2tp_xmit_ipv6_csum(sk, skb, udp_len); else #endif @@ -1704,6 +1705,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (ipv6_addr_v4mapped(&np->saddr) && + ipv6_addr_v4mapped(&np->daddr)) { + struct inet_sock *inet = inet_sk(sk); + + tunnel->v4mapped = true; + inet->inet_saddr = np->saddr.s6_addr32[3]; + inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; + inet->inet_daddr = np->daddr.s6_addr32[3]; + } else { + tunnel->v4mapped = false; + } + } +#endif + /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { @@ -1712,7 +1731,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) udpv6_encap_enable(); else #endif diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 66a559b..6f251cb 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -194,6 +194,9 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ +#if IS_ENABLED(CONFIG_IPV6) + bool v4mapped; +#endif struct work_struct del_work; -- cgit v0.10.2 From 041b4ddb84989f06ff1df0ca869b950f1ee3cb1c Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 2 Oct 2013 12:57:20 +0200 Subject: net: mv643xx_eth: update statistics timer from timer context only Each port driver installs a periodic timer to update port statistics by calling mib_counters_update. As mib_counters_update is also called from non-timer context, we should not reschedule the timer there but rather move it to timer-only context. Signed-off-by: Sebastian Hesselbarth Acked-by: Jason Cooper Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 7fb5677..44a87e4 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1131,15 +1131,13 @@ static void mib_counters_update(struct mv643xx_eth_private *mp) p->rx_discard += rdlp(mp, RX_DISCARD_FRAME_CNT); p->rx_overrun += rdlp(mp, RX_OVERRUN_FRAME_CNT); spin_unlock_bh(&mp->mib_counters_lock); - - mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); } static void mib_counters_timer_wrapper(unsigned long _mp) { struct mv643xx_eth_private *mp = (void *)_mp; - mib_counters_update(mp); + mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); } -- cgit v0.10.2 From f564412c935111c583b787bcc18157377b208e2e Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 2 Oct 2013 12:57:21 +0200 Subject: net: mv643xx_eth: fix orphaned statistics timer crash The periodic statistics timer gets started at port _probe() time, but is stopped on _stop() only. In a modular environment, this can cause the timer to access already deallocated memory, if the module is unloaded without starting the eth device. To fix this, we add the timer right before the port is started, instead of at _probe() time. Signed-off-by: Sebastian Hesselbarth Acked-by: Jason Cooper Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 44a87e4..2364707 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2235,6 +2235,7 @@ static int mv643xx_eth_open(struct net_device *dev) mp->int_mask |= INT_TX_END_0 << i; } + add_timer(&mp->mib_counters_timer); port_start(mp); wrlp(mp, INT_MASK_EXT, INT_EXT_LINK_PHY | INT_EXT_TX); @@ -2914,7 +2915,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev) mp->mib_counters_timer.data = (unsigned long)mp; mp->mib_counters_timer.function = mib_counters_timer_wrapper; mp->mib_counters_timer.expires = jiffies + 30 * HZ; - add_timer(&mp->mib_counters_timer); spin_lock_init(&mp->mib_counters_lock); -- cgit v0.10.2 From b5d82db83c1d0b327b599508a0830d25cc1f15db Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Wed, 2 Oct 2013 12:57:22 +0200 Subject: net: mv643xx_eth: fix missing device_node for port devices DT-based mv643xx_eth probes and creates platform_devices for the port devices on its own. To allow fixups for ports based on the device_node, we need to set .of_node of the corresponding device with the correct node. Signed-off-by: Sebastian Hesselbarth Acked-by: Jason Cooper Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 2364707..2c210ec 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2533,6 +2533,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, if (!ppdev) return -ENOMEM; ppdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); + ppdev->dev.of_node = pnp; ret = platform_device_add_resources(ppdev, &res, 1); if (ret) -- cgit v0.10.2 From 1661bf364ae9c506bc8795fef70d1532931be1e8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Oct 2013 00:27:20 +0300 Subject: net: heap overflow in __audit_sockaddr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to cap ->msg_namelen or it leads to a buffer overflow when we to the memcpy() in __audit_sockaddr(). It requires CAP_AUDIT_CONTROL to exploit this bug. The call tree is: ___sys_recvmsg() move_addr_to_user() audit_sockaddr() __audit_sockaddr() Reported-by: Jüri Aedla Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/net/compat.c b/net/compat.c index f0a1ba6..8903258 100644 --- a/net/compat.c +++ b/net/compat.c @@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/socket.c b/net/socket.c index ebed4b6..c226ace 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1964,6 +1964,16 @@ struct used_address { unsigned int name_len; }; +static int copy_msghdr_from_user(struct msghdr *kmsg, + struct msghdr __user *umsg) +{ + if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; + return 0; +} + static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address) @@ -1982,8 +1992,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; @@ -2191,8 +2204,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; -- cgit v0.10.2 From 5e8a402f831dbe7ee831340a91439e46f0d38acd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Oct 2013 10:31:41 -0700 Subject: tcp: do not forget FIN in tcp_shifted_skb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yuchung found following problem : There are bugs in the SACK processing code, merging part in tcp_shift_skb_data(), that incorrectly resets or ignores the sacked skbs FIN flag. When a receiver first SACK the FIN sequence, and later throw away ofo queue (e.g., sack-reneging), the sender will stop retransmitting the FIN flag, and hangs forever. Following packetdrill test can be used to reproduce the bug. $ cat sack-merge-bug.pkt `sysctl -q net.ipv4.tcp_fack=0` // Establish a connection and send 10 MSS. 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +.000 bind(3, ..., ...) = 0 +.000 listen(3, 1) = 0 +.050 < S 0:0(0) win 32792 +.000 > S. 0:0(0) ack 1 +.001 < . 1:1(0) ack 1 win 1024 +.000 accept(3, ..., ...) = 4 +.100 write(4, ..., 12000) = 12000 +.000 shutdown(4, SHUT_WR) = 0 +.000 > . 1:10001(10000) ack 1 +.050 < . 1:1(0) ack 2001 win 257 +.000 > FP. 10001:12001(2000) ack 1 +.050 < . 1:1(0) ack 2001 win 257 +.050 < . 1:1(0) ack 2001 win 257 // SACK reneg +.050 < . 1:1(0) ack 12001 win 257 +0 %{ print "unacked: ",tcpi_unacked }% +5 %{ print "" }% First, a typo inverted left/right of one OR operation, then code forgot to advance end_seq if the merged skb carried FIN. Bug was added in 2.6.29 by commit 832d11c5cd076ab ("tcp: Try to restore large SKBs while SACK processing") Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Cc: Ilpo Järvinen Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 25a89ea..113dc5f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1284,7 +1284,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; + TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + TCP_SKB_CB(prev)->end_seq++; + if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); -- cgit v0.10.2 From d546a89362dcc35bc1aacf0e8e3b0da5a7d2dc89 Mon Sep 17 00:00:00 2001 From: Jon Cooper Date: Fri, 27 Sep 2013 18:26:30 +0100 Subject: sfc: Add rmb() between reading stats and generation count to ensure consistency Signed-off-by: Ben Hutchings diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 9f18ae9..a4fbb38 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -545,6 +545,7 @@ static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) rmb(); efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask, stats, efx->stats_buffer.addr, false); + rmb(); generation_start = dma_stats[MC_CMD_MAC_GENERATION_START]; if (generation_end != generation_start) return -EAGAIN; -- cgit v0.10.2 From 87648cc925222f16c3bd119af8ae8e8fb2f3fe1d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 25 Sep 2013 17:34:12 +0100 Subject: sfc: Fix internal indices of ethtool stats for EF10 The indices in nic_data->stats need to match the EF10_STAT_whatever enum values. In efx_nic_update_stats, only mask; gaps are removed in efx_ef10_update_stats. Signed-off-by: Ben Hutchings diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index e7dbd2d..9826594 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -469,8 +469,7 @@ size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count, * @count: Length of the @desc array * @mask: Bitmask of which elements of @desc are enabled * @stats: Buffer to update with the converted statistics. The length - * of this array must be at least the number of set bits in the - * first @count bits of @mask. + * of this array must be at least @count. * @dma_buf: DMA buffer containing hardware statistics * @accumulate: If set, the converted values will be added rather than * directly stored to the corresponding elements of @stats @@ -503,11 +502,9 @@ void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count, } if (accumulate) - *stats += val; + stats[index] += val; else - *stats = val; + stats[index] = val; } - - ++stats; } } -- cgit v0.10.2 From 4bae913bd372aab90fdbd91e7e5fb841a2a0dc15 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 27 Sep 2013 18:52:49 +0100 Subject: sfc: Refactor EF10 stat mask code to allow for more conditional stats Previously, efx_ef10_stat_mask returned a static const unsigned long[], which meant that each possible mask had to be declared statically with STAT_MASK_BITMAP. Since adding a condition would double the size of the decision tree, we now create the bitmask dynamically. To do this, we have two functions efx_ef10_raw_stat_mask, which returns a u64, and efx_ef10_get_stat_mask, which fills in an unsigned long * argument. Signed-off-by: Ben Hutchings diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index a4fbb38..c00a5d6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -498,44 +498,49 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { #define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) | \ (1ULL << EF10_STAT_rx_length_error)) -#if BITS_PER_LONG == 64 -#define STAT_MASK_BITMAP(bits) (bits) -#else -#define STAT_MASK_BITMAP(bits) (bits) & 0xffffffff, (bits) >> 32 -#endif - -static const unsigned long *efx_ef10_stat_mask(struct efx_nic *efx) +static u64 efx_ef10_raw_stat_mask(struct efx_nic *efx) { - static const unsigned long hunt_40g_stat_mask[] = { - STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | - HUNT_40G_EXTRA_STAT_MASK) - }; - static const unsigned long hunt_10g_only_stat_mask[] = { - STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | - HUNT_10G_ONLY_STAT_MASK) - }; + u64 raw_mask = HUNT_COMMON_STAT_MASK; u32 port_caps = efx_mcdi_phy_get_caps(efx); if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) - return hunt_40g_stat_mask; + raw_mask |= HUNT_40G_EXTRA_STAT_MASK; else - return hunt_10g_only_stat_mask; + raw_mask |= HUNT_10G_ONLY_STAT_MASK; + return raw_mask; +} + +static void efx_ef10_get_stat_mask(struct efx_nic *efx, unsigned long *mask) +{ + u64 raw_mask = efx_ef10_raw_stat_mask(efx); + +#if BITS_PER_LONG == 64 + mask[0] = raw_mask; +#else + mask[0] = raw_mask & 0xffffffff; + mask[1] = raw_mask >> 32; +#endif } static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names) { + DECLARE_BITMAP(mask, EF10_STAT_COUNT); + + efx_ef10_get_stat_mask(efx, mask); return efx_nic_describe_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, - efx_ef10_stat_mask(efx), names); + mask, names); } static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; - const unsigned long *stats_mask = efx_ef10_stat_mask(efx); + DECLARE_BITMAP(mask, EF10_STAT_COUNT); __le64 generation_start, generation_end; u64 *stats = nic_data->stats; __le64 *dma_stats; + efx_ef10_get_stat_mask(efx, mask); + dma_stats = efx->stats_buffer.addr; nic_data = efx->nic_data; @@ -543,7 +548,7 @@ static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) if (generation_end == EFX_MC_STATS_GENERATION_INVALID) return 0; rmb(); - efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask, + efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, mask, stats, efx->stats_buffer.addr, false); rmb(); generation_start = dma_stats[MC_CMD_MAC_GENERATION_START]; @@ -564,12 +569,14 @@ static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) static size_t efx_ef10_update_stats(struct efx_nic *efx, u64 *full_stats, struct rtnl_link_stats64 *core_stats) { - const unsigned long *mask = efx_ef10_stat_mask(efx); + DECLARE_BITMAP(mask, EF10_STAT_COUNT); struct efx_ef10_nic_data *nic_data = efx->nic_data; u64 *stats = nic_data->stats; size_t stats_count = 0, index; int retry; + efx_ef10_get_stat_mask(efx, mask); + /* If we're unlucky enough to read statistics during the DMA, wait * up to 10ms for it to finish (typically takes <500us) */ -- cgit v0.10.2 From 2ca10a75d897c6cd3cfcf87567551d775c9b10bb Mon Sep 17 00:00:00 2001 From: Matthew Slattery Date: Tue, 10 Sep 2013 19:06:27 +0100 Subject: sfc: Add definitions for new stats counters and capability flag Signed-off-by: Ben Hutchings diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index b5cf624..e0a63dd 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -2574,8 +2574,58 @@ #define MC_CMD_MAC_RX_LANES01_DISP_ERR 0x39 /* enum */ #define MC_CMD_MAC_RX_LANES23_DISP_ERR 0x3a /* enum */ #define MC_CMD_MAC_RX_MATCH_FAULT 0x3b /* enum */ -#define MC_CMD_GMAC_DMABUF_START 0x40 /* enum */ -#define MC_CMD_GMAC_DMABUF_END 0x5f /* enum */ +/* enum: PM trunc_bb_overflow counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW 0x3c +/* enum: PM discard_bb_overflow counter. Valid for EF10 with + * PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW 0x3d +/* enum: PM trunc_vfifo_full counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_TRUNC_VFIFO_FULL 0x3e +/* enum: PM discard_vfifo_full counter. Valid for EF10 with + * PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_VFIFO_FULL 0x3f +/* enum: PM trunc_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_TRUNC_QBB 0x40 +/* enum: PM discard_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_QBB 0x41 +/* enum: PM discard_mapping counter. Valid for EF10 with PM_AND_RXDP_COUNTERS + * capability only. + */ +#define MC_CMD_MAC_PM_DISCARD_MAPPING 0x42 +/* enum: RXDP counter: Number of packets dropped due to the queue being + * disabled. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_Q_DISABLED_PKTS 0x43 +/* enum: RXDP counter: Number of packets dropped by the DICPU. Valid for EF10 + * with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_DI_DROPPED_PKTS 0x45 +/* enum: RXDP counter: Number of non-host packets. Valid for EF10 with + * PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_STREAMING_PKTS 0x46 +/* enum: RXDP counter: Number of times an emergency descriptor fetch was + * performed. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_EMERGENCY_FETCH_CONDITIONS 0x47 +/* enum: RXDP counter: Number of times the DPCPU waited for an existing + * descriptor fetch. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only. + */ +#define MC_CMD_MAC_RXDP_EMERGENCY_WAIT_CONDITIONS 0x48 +/* enum: Start of GMAC stats buffer space, for Siena only. */ +#define MC_CMD_GMAC_DMABUF_START 0x40 +/* enum: End of GMAC stats buffer space, for Siena only. */ +#define MC_CMD_GMAC_DMABUF_END 0x5f #define MC_CMD_MAC_GENERATION_END 0x60 /* enum */ #define MC_CMD_MAC_NSTATS 0x61 /* enum */ @@ -5065,6 +5115,8 @@ #define MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_WIDTH 1 #define MC_CMD_GET_CAPABILITIES_OUT_MCAST_FILTER_CHAINING_LBN 26 #define MC_CMD_GET_CAPABILITIES_OUT_MCAST_FILTER_CHAINING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN 27 +#define MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1 /* RxDPCPU firmware id. */ #define MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_OFST 4 #define MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_LEN 2 -- cgit v0.10.2 From 568d7a001bd5612a280e1026cce7e6e2e16b7687 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 25 Sep 2013 17:32:09 +0100 Subject: sfc: Add PM and RXDP drop counters to ethtool stats Recognise the new Packet Memory and RX Data Path counters. The following counters are added: rx_pm_{trunc,discard}_bb_overflow - burst buffer overflowed. This should not occur if BB correctly configured. rx_pm_{trunc,discard}_vfifo_full - not enough space in packet memory. May indicate RX performance problems. rx_pm_{trunc,discard}_qbb - dropped by 802.1Qbb early discard mechanism. Since Qbb is not supported at present, this should not occur. rx_pm_discard_mapping - 802.1p priority configured to be dropped. This should not occur in normal operation. rx_dp_q_disabled_packets - packet was to be delivered to a queue but queue is disabled. May indicate misconfiguration by the driver. rx_dp_di_dropped_packets - parser-dispatcher indicated that a packet should be dropped. rx_dp_streaming_packets - packet was sent to the RXDP streaming bus, ie. a filter directed the packet to the MCPU. rx_dp_emerg_{fetch,wait} - RX datapath had to wait for descriptors to be loaded. Indicates performance problems but not drops. These are only provided if the MC firmware has the PM_AND_RXDP_COUNTERS capability. Otherwise, mask them out. Signed-off-by: Ben Hutchings diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index c00a5d6..21f9ad6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -444,6 +444,18 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { EF10_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS), EF10_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS), EF10_DMA_STAT(rx_nodesc_drops, RX_NODESC_DROPS), + EF10_DMA_STAT(rx_pm_trunc_bb_overflow, PM_TRUNC_BB_OVERFLOW), + EF10_DMA_STAT(rx_pm_discard_bb_overflow, PM_DISCARD_BB_OVERFLOW), + EF10_DMA_STAT(rx_pm_trunc_vfifo_full, PM_TRUNC_VFIFO_FULL), + EF10_DMA_STAT(rx_pm_discard_vfifo_full, PM_DISCARD_VFIFO_FULL), + EF10_DMA_STAT(rx_pm_trunc_qbb, PM_TRUNC_QBB), + EF10_DMA_STAT(rx_pm_discard_qbb, PM_DISCARD_QBB), + EF10_DMA_STAT(rx_pm_discard_mapping, PM_DISCARD_MAPPING), + EF10_DMA_STAT(rx_dp_q_disabled_packets, RXDP_Q_DISABLED_PKTS), + EF10_DMA_STAT(rx_dp_di_dropped_packets, RXDP_DI_DROPPED_PKTS), + EF10_DMA_STAT(rx_dp_streaming_packets, RXDP_STREAMING_PKTS), + EF10_DMA_STAT(rx_dp_emerg_fetch, RXDP_EMERGENCY_FETCH_CONDITIONS), + EF10_DMA_STAT(rx_dp_emerg_wait, RXDP_EMERGENCY_WAIT_CONDITIONS), }; #define HUNT_COMMON_STAT_MASK ((1ULL << EF10_STAT_tx_bytes) | \ @@ -498,15 +510,38 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { #define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) | \ (1ULL << EF10_STAT_rx_length_error)) +/* These statistics are only provided if the firmware supports the + * capability PM_AND_RXDP_COUNTERS. + */ +#define HUNT_PM_AND_RXDP_STAT_MASK ( \ + (1ULL << EF10_STAT_rx_pm_trunc_bb_overflow) | \ + (1ULL << EF10_STAT_rx_pm_discard_bb_overflow) | \ + (1ULL << EF10_STAT_rx_pm_trunc_vfifo_full) | \ + (1ULL << EF10_STAT_rx_pm_discard_vfifo_full) | \ + (1ULL << EF10_STAT_rx_pm_trunc_qbb) | \ + (1ULL << EF10_STAT_rx_pm_discard_qbb) | \ + (1ULL << EF10_STAT_rx_pm_discard_mapping) | \ + (1ULL << EF10_STAT_rx_dp_q_disabled_packets) | \ + (1ULL << EF10_STAT_rx_dp_di_dropped_packets) | \ + (1ULL << EF10_STAT_rx_dp_streaming_packets) | \ + (1ULL << EF10_STAT_rx_dp_emerg_fetch) | \ + (1ULL << EF10_STAT_rx_dp_emerg_wait)) + static u64 efx_ef10_raw_stat_mask(struct efx_nic *efx) { u64 raw_mask = HUNT_COMMON_STAT_MASK; u32 port_caps = efx_mcdi_phy_get_caps(efx); + struct efx_ef10_nic_data *nic_data = efx->nic_data; if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) raw_mask |= HUNT_40G_EXTRA_STAT_MASK; else raw_mask |= HUNT_10G_ONLY_STAT_MASK; + + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN)) + raw_mask |= HUNT_PM_AND_RXDP_STAT_MASK; + return raw_mask; } diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index fda29d3..890bbbe 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -386,6 +386,18 @@ enum { EF10_STAT_rx_align_error, EF10_STAT_rx_length_error, EF10_STAT_rx_nodesc_drops, + EF10_STAT_rx_pm_trunc_bb_overflow, + EF10_STAT_rx_pm_discard_bb_overflow, + EF10_STAT_rx_pm_trunc_vfifo_full, + EF10_STAT_rx_pm_discard_vfifo_full, + EF10_STAT_rx_pm_trunc_qbb, + EF10_STAT_rx_pm_discard_qbb, + EF10_STAT_rx_pm_discard_mapping, + EF10_STAT_rx_dp_q_disabled_packets, + EF10_STAT_rx_dp_di_dropped_packets, + EF10_STAT_rx_dp_streaming_packets, + EF10_STAT_rx_dp_emerg_fetch, + EF10_STAT_rx_dp_emerg_wait, EF10_STAT_COUNT }; -- cgit v0.10.2 From 3573540cafa4296dd60f8be02f2aecaa31047525 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 2 Oct 2013 09:14:06 +0300 Subject: netif_set_xps_queue: make cpu mask const virtio wants to pass in cpumask_of(cpu), make parameter const to avoid build warnings. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3de49ac..25f5d2d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2264,11 +2264,12 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) } #ifdef CONFIG_XPS -extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, +extern int netif_set_xps_queue(struct net_device *dev, + const struct cpumask *mask, u16 index); #else static inline int netif_set_xps_queue(struct net_device *dev, - struct cpumask *mask, + const struct cpumask *mask, u16 index) { return 0; diff --git a/net/core/dev.c b/net/core/dev.c index 65f829c..3430b1e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1917,7 +1917,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map, return new_map; } -int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, + u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; struct xps_map *map, *new_map; -- cgit v0.10.2 From 582442d6d5bc74c1e11a2515f9387d3d227278e2 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Thu, 3 Oct 2013 14:49:26 +0100 Subject: ipv6: Allow the MTU of ipip6 tunnel to be set below 1280 The (inner) MTU of a ipip6 (IPv4-in-IPv6) tunnel cannot be set below 1280, which is the minimum MTU in IPv6. However, there should be no IPv6 on the tunnel interface at all, so the IPv6 rules should not apply. More info at https://bugzilla.kernel.org/show_bug.cgi?id=15530 This patch allows to check the minimum MTU for ipv6 tunnel according to these rules: -In case the tunnel is configured with ipip6 mode the minimum MTU is 68. -In case the tunnel is configured with ip6ip6 or any mode the minimum MTU is 1280. Signed-off-by: Oussama Ghorbel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a791552..583b77e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1430,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < IPV6_MIN_MTU) { - return -EINVAL; + struct ip6_tnl *tnl = netdev_priv(dev); + + if (tnl->parms.proto == IPPROTO_IPIP) { + if (new_mtu < 68) + return -EINVAL; + } else { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; } + if (new_mtu > 0xFFF8 - dev->hard_header_len) + return -EINVAL; dev->mtu = new_mtu; return 0; } -- cgit v0.10.2 From ecb1c9cc215cb5a4390b714d8b09de637f54fa3f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 7 Oct 2013 20:10:11 +0100 Subject: sfc: Only bind to EF10 functions with the LinkCtrl and Trusted flags Although we do not yet enable multiple PFs per port, it is possible that a board will be reconfigured to enable them while the driver has not yet been updated to fully support this. The most obvious problem is that multiple functions may try to set conflicting link settings. But we will also run into trouble if the firmware doesn't consider us fully trusted. So, abort probing unless both the LinkCtrl and Trusted flags are set for this function. Signed-off-by: Ben Hutchings diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index c082562..366c8e3 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -963,7 +963,7 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, bool *was_attached) { MCDI_DECLARE_BUF(inbuf, MC_CMD_DRV_ATTACH_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_DRV_ATTACH_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_DRV_ATTACH_EXT_OUT_LEN); size_t outlen; int rc; @@ -981,6 +981,22 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, goto fail; } + /* We currently assume we have control of the external link + * and are completely trusted by firmware. Abort probing + * if that's not true for this function. + */ + if (driver_operating && + outlen >= MC_CMD_DRV_ATTACH_EXT_OUT_LEN && + (MCDI_DWORD(outbuf, DRV_ATTACH_EXT_OUT_FUNC_FLAGS) & + (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | + 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) != + (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | + 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) { + netif_err(efx, probe, efx->net_dev, + "This driver version only supports one function per port\n"); + return -ENODEV; + } + if (was_attached != NULL) *was_attached = MCDI_DWORD(outbuf, DRV_ATTACH_OUT_OLD_STATE); return 0; -- cgit v0.10.2 From d45ed4a4e33ae103053c0a53d280014e7101bb5c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 4 Oct 2013 00:14:06 -0700 Subject: net: fix unsafe set_memory_rw from softirq on x86 system with net.core.bpf_jit_enable = 1 sudo tcpdump -i eth1 'tcp port 22' causes the warning: [ 56.766097] Possible unsafe locking scenario: [ 56.766097] [ 56.780146] CPU0 [ 56.786807] ---- [ 56.793188] lock(&(&vb->lock)->rlock); [ 56.799593] [ 56.805889] lock(&(&vb->lock)->rlock); [ 56.812266] [ 56.812266] *** DEADLOCK *** [ 56.812266] [ 56.830670] 1 lock held by ksoftirqd/1/13: [ 56.836838] #0: (rcu_read_lock){.+.+..}, at: [] vm_unmap_aliases+0x8c/0x380 [ 56.849757] [ 56.849757] stack backtrace: [ 56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45 [ 56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 56.882004] ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007 [ 56.895630] ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001 [ 56.909313] ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001 [ 56.923006] Call Trace: [ 56.929532] [] dump_stack+0x55/0x76 [ 56.936067] [] print_usage_bug+0x1f7/0x208 [ 56.942445] [] ? save_stack_trace+0x2f/0x50 [ 56.948932] [] ? check_usage_backwards+0x150/0x150 [ 56.955470] [] mark_lock+0x282/0x2c0 [ 56.961945] [] __lock_acquire+0x45d/0x1d50 [ 56.968474] [] ? __lock_acquire+0x2de/0x1d50 [ 56.975140] [] ? cpumask_next_and+0x55/0x90 [ 56.981942] [] lock_acquire+0x92/0x1d0 [ 56.988745] [] ? vm_unmap_aliases+0x16a/0x380 [ 56.995619] [] _raw_spin_lock+0x41/0x50 [ 57.002493] [] ? vm_unmap_aliases+0x16a/0x380 [ 57.009447] [] vm_unmap_aliases+0x16a/0x380 [ 57.016477] [] ? vm_unmap_aliases+0x8c/0x380 [ 57.023607] [] change_page_attr_set_clr+0xc0/0x460 [ 57.030818] [] ? trace_hardirqs_on+0xd/0x10 [ 57.037896] [] ? kmem_cache_free+0xb0/0x2b0 [ 57.044789] [] ? free_object_rcu+0x93/0xa0 [ 57.051720] [] set_memory_rw+0x2f/0x40 [ 57.058727] [] bpf_jit_free+0x2c/0x40 [ 57.065577] [] sk_filter_release_rcu+0x1a/0x30 [ 57.072338] [] rcu_process_callbacks+0x202/0x7c0 [ 57.078962] [] __do_softirq+0xf7/0x3f0 [ 57.085373] [] run_ksoftirqd+0x35/0x70 cannot reuse jited filter memory, since it's readonly, so use original bpf insns memory to hold work_struct defer kfree of sk_filter until jit completed freeing tested on x86_64 and i386 Signed-off-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f50d223..99b44e0 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..2345bdb 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 7092392..a5df511 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp) struct bpf_binary_header *header = (void *)addr; if (fp->bpf_func == sk_run_filter) - return; + goto free_filter; set_memory_rw(addr, header->pages); module_free(NULL, header); +free_filter: + kfree(fp); } diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 9c7be59..218b6b2 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) module_free(NULL, fp->bpf_func); + kfree(fp); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79c216a..516593e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -772,13 +772,21 @@ out: return; } +static void bpf_jit_free_deferred(struct work_struct *work) +{ + struct sk_filter *fp = container_of(work, struct sk_filter, work); + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; + + set_memory_rw(addr, header->pages); + module_free(NULL, header); + kfree(fp); +} + void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) { - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - set_memory_rw(addr, header->pages); - module_free(NULL, header); + INIT_WORK(&fp->work, bpf_jit_free_deferred); + schedule_work(&fp->work); } } diff --git a/include/linux/filter.h b/include/linux/filter.h index a6ac848..ff4e40c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -6,6 +6,7 @@ #include #include +#include #include #ifdef CONFIG_COMPAT @@ -25,15 +26,19 @@ struct sk_filter { atomic_t refcnt; unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct sock_filter *filter); - struct rcu_head rcu; - struct sock_filter insns[0]; + union { + struct sock_filter insns[0]; + struct work_struct work; + }; }; -static inline unsigned int sk_filter_len(const struct sk_filter *fp) +static inline unsigned int sk_filter_size(unsigned int proglen) { - return fp->len * sizeof(struct sock_filter) + sizeof(*fp); + return max(sizeof(struct sk_filter), + offsetof(struct sk_filter, insns[proglen])); } extern int sk_filter(struct sock *sk, struct sk_buff *skb); @@ -67,11 +72,13 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, } #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) #else +#include static inline void bpf_jit_compile(struct sk_filter *fp) { } static inline void bpf_jit_free(struct sk_filter *fp) { + kfree(fp); } #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) #endif diff --git a/include/net/sock.h b/include/net/sock.h index 1d37a80..808cbc2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1630,16 +1630,14 @@ static inline void sk_filter_release(struct sk_filter *fp) static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) { - unsigned int size = sk_filter_len(fp); - - atomic_sub(size, &sk->sk_omem_alloc); + atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); sk_filter_release(fp); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) { atomic_inc(&fp->refcnt); - atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc); + atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); } /* diff --git a/net/core/filter.c b/net/core/filter.c index 6438f29..01b7808 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu) struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); bpf_jit_free(fp); - kfree(fp); } EXPORT_SYMBOL(sk_filter_release_rcu); @@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp, if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); + fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; memcpy(fp->insns, fprog->filter, fsize); @@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; + unsigned int sk_fsize = sk_filter_size(fprog->len); int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) @@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); + fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); if (!fp) return -ENOMEM; if (copy_from_user(fp->insns, fprog->filter, fsize)) { - sock_kfree_s(sk, fp, fsize+sizeof(*fp)); + sock_kfree_s(sk, fp, sk_fsize); return -EFAULT; } -- cgit v0.10.2 From f468b10e3de6c5f90a14b9c82f1403a2651cef35 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 4 Oct 2013 14:44:39 +0200 Subject: net: ethernet: cpsw: Search childs for slave nodes The current implementation searches the whole DT for nodes named "slave". This patch changes it to search only child nodes for slaves. Signed-off-by: Markus Pargmann Acked-by: Mugunthan V N Acked-by: Peter Korsgaard Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 79974e3..5df50a9 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1782,7 +1782,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, if (ret) pr_warn("Doesn't have any child node\n"); - for_each_node_by_name(slave_node, "slave") { + for_each_child_of_node(node, slave_node) { struct cpsw_slave_data *slave_data = data->slave_data + i; const void *mac_addr = NULL; u32 phyid; @@ -1791,6 +1791,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, struct device_node *mdio_node; struct platform_device *mdio; + /* This is no slave child node, continue */ + if (strcmp(slave_node->name, "slave")) + continue; + parp = of_get_property(slave_node, "phy_id", &lenp); if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) { pr_err("Missing slave[%d] phy_id property\n", i); -- cgit v0.10.2 From 281abd965fedfe7c31927d84e01a5bfa554b5502 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 4 Oct 2013 14:44:40 +0200 Subject: net/ethernet: cpsw: DT read bool dual_emac Signed-off-by: Markus Pargmann Acked-by: Mugunthan V N Acked-by: Peter Korsgaard Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5df50a9..804846e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1771,8 +1771,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->mac_control = prop; - if (!of_property_read_u32(node, "dual_emac", &prop)) - data->dual_emac = prop; + if (of_property_read_bool(node, "dual_emac")) + data->dual_emac = 1; /* * Populate all the child nodes here... -- cgit v0.10.2 From 0a7e22609067ff524fc7bbd45c6951dd08561667 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 4 Oct 2013 17:04:48 +0200 Subject: ipv4: fix ineffective source address selection When sending out multicast messages, the source address in inet->mc_addr is ignored and rewritten by an autoselected one. This is caused by a typo in commit 813b3b5db831 ("ipv4: Use caller's on-stack flowi as-is in output route lookups"). Signed-off-by: Jiri Benc Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 727f436..6011615 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2072,7 +2072,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) RT_SCOPE_LINK); goto make_route; } - if (fl4->saddr) { + if (!fl4->saddr) { if (ipv4_is_multicast(fl4->daddr)) fl4->saddr = inet_select_addr(dev_out, 0, fl4->flowi4_scope); -- cgit v0.10.2 From fe119a05f8ca481623a8d02efcc984332e612528 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 5 Oct 2013 21:25:17 +0200 Subject: can: dev: fix nlmsg size calculation in can_get_size() This patch fixes the calculation of the nlmsg size, by adding the missing nla_total_size(). Signed-off-by: Marc Kleine-Budde Signed-off-by: David S. Miller diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index f9cba41..1870c47 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -705,14 +705,14 @@ static size_t can_get_size(const struct net_device *dev) size_t size; size = nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ - size += sizeof(struct can_ctrlmode); /* IFLA_CAN_CTRLMODE */ + size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ - size += sizeof(struct can_bittiming); /* IFLA_CAN_BITTIMING */ - size += sizeof(struct can_clock); /* IFLA_CAN_CLOCK */ + size += nla_total_size(sizeof(struct can_bittiming)); /* IFLA_CAN_BITTIMING */ + size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ - size += sizeof(struct can_berr_counter); + size += nla_total_size(sizeof(struct can_berr_counter)); if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ - size += sizeof(struct can_bittiming_const); + size += nla_total_size(sizeof(struct can_bittiming_const)); return size; } -- cgit v0.10.2 From 88ba09df236c618b5466d35a0165df0dc865eac5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 5 Oct 2013 13:15:30 -0700 Subject: net: Update the sysctl permissions handler to test effective uid/gid On Tue, 20 Aug 2013 11:40:04 -0500 Eric Sandeen wrote: > This was brought up in a Red Hat bug (which may be marked private, I'm sorry): > > Bug 987055 - open O_WRONLY succeeds on some root owned files in /proc for process running with unprivileged EUID > > "On RHEL7 some of the files in /proc can be opened for writing by an unprivileged EUID." > > The flaw existed upstream as well last I checked. > > This commit in kernel v3.8 caused the regression: > > commit cff109768b2d9c03095848f4cd4b0754117262aa > Author: Eric W. Biederman > Date: Fri Nov 16 03:03:01 2012 +0000 > > net: Update the per network namespace sysctls to be available to the network namespace owner > > - Allow anyone with CAP_NET_ADMIN rights in the user namespace of the > the netowrk namespace to change sysctls. > - Allow anyone the uid of the user namespace root the same > permissions over the network namespace sysctls as the global root. > - Allow anyone with gid of the user namespace root group the same > permissions over the network namespace sysctl as the global root group. > > Signed-off-by: "Eric W. Biederman" > Signed-off-by: David S. Miller > > because it changed /sys/net's special permission handler to test current_uid, not > current_euid; same for current_gid/current_egid. > > So in this case, root cannot drop privs via set[ug]id, and retains all privs > in this codepath. Modify the code to use current_euid(), and in_egroup_p, as in done in fs/proc/proc_sysctl.c:test_perm() Cc: stable@vger.kernel.org Reviewed-by: Eric Sandeen Reported-by: Eric Sandeen Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9bc6db0..e7000be 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head, /* Allow network administrator to have same access as root. */ if (ns_capable(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_uid())) { + uid_eq(root_uid, current_euid())) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } /* Allow netns root group to have the same access as the root group */ - if (gid_eq(root_gid, current_gid())) { + if (in_egroup_p(root_gid)) { int mode = (table->mode >> 3) & 7; return (mode << 3) | mode; } -- cgit v0.10.2 From 7adac1ec8198e9f5c802918e58a967b92db4a66f Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:15:18 -0400 Subject: 6lowpan: Only make 6lowpan links to IEEE802154 devices Refuse to create 6lowpan links if the actual hardware interface is of any type other than ARPHRD_IEEE802154. Signed-off-by: Alan Ott Suggested-by: Alexander Aring Signed-off-by: David S. Miller diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index c85e71e..8f56b2b 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1372,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; + if (real_dev->type != ARPHRD_IEEE802154) + return -EINVAL; lowpan_dev_info(dev)->real_dev = real_dev; lowpan_dev_info(dev)->fragment_tag = 0; -- cgit v0.10.2 From ab2d95df9c19f45805977be1c869ba3df9a10835 Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:15:19 -0400 Subject: 6lowpan: Sync default hardware address of lowpan links to their wpan When a lowpan link to a wpan device is created, set the hardware address of the lowpan link to that of the wpan device. Signed-off-by: Alan Ott Signed-off-by: David S. Miller diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 8f56b2b..ff41b4d 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1388,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, entry->ldev = dev; + /* Set the lowpan harware address to the wpan hardware address. */ + memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); INIT_LIST_HEAD(&entry->list); list_add_tail(&entry->list, &lowpan_devices); -- cgit v0.10.2 From 9757f1d2e0b4e7ff59e76c2fadbf819e18f3f884 Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:52:22 -0400 Subject: mrf24j40: Move INIT_COMPLETION() to before packet transmission This avoids a race condition where complete(tx_complete) could be called before tx_complete is initialized. Signed-off-by: Alan Ott Signed-off-by: David S. Miller diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 42e6dee..66bb4ce 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -344,6 +344,8 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) if (ret) goto err; + INIT_COMPLETION(devrec->tx_complete); + /* Set TXNTRIG bit of TXNCON to send packet */ ret = read_short_reg(devrec, REG_TXNCON, &val); if (ret) @@ -354,8 +356,6 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) val |= 0x4; write_short_reg(devrec, REG_TXNCON, val); - INIT_COMPLETION(devrec->tx_complete); - /* Wait for the device to send the TX complete interrupt. */ ret = wait_for_completion_interruptible_timeout( &devrec->tx_complete, -- cgit v0.10.2 From 4a4e1da83cc1ff1b8a5171d9b16d4a6c2a6936db Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:52:23 -0400 Subject: mrf24j40: Use threaded IRQ handler Eliminate all the workqueue and interrupt enable/disable. Signed-off-by: Alan Ott Signed-off-by: David S. Miller diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 66bb4ce..c1bc688 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -82,7 +82,6 @@ struct mrf24j40 { struct mutex buffer_mutex; /* only used to protect buf */ struct completion tx_complete; - struct work_struct irqwork; u8 *buf; /* 3 bytes. Used for SPI single-register transfers. */ }; @@ -590,17 +589,6 @@ static struct ieee802154_ops mrf24j40_ops = { static irqreturn_t mrf24j40_isr(int irq, void *data) { struct mrf24j40 *devrec = data; - - disable_irq_nosync(irq); - - schedule_work(&devrec->irqwork); - - return IRQ_HANDLED; -} - -static void mrf24j40_isrwork(struct work_struct *work) -{ - struct mrf24j40 *devrec = container_of(work, struct mrf24j40, irqwork); u8 intstat; int ret; @@ -618,7 +606,7 @@ static void mrf24j40_isrwork(struct work_struct *work) mrf24j40_handle_rx(devrec); out: - enable_irq(devrec->spi->irq); + return IRQ_HANDLED; } static int mrf24j40_probe(struct spi_device *spi) @@ -642,7 +630,6 @@ static int mrf24j40_probe(struct spi_device *spi) mutex_init(&devrec->buffer_mutex); init_completion(&devrec->tx_complete); - INIT_WORK(&devrec->irqwork, mrf24j40_isrwork); devrec->spi = spi; spi_set_drvdata(spi, devrec); @@ -688,11 +675,12 @@ static int mrf24j40_probe(struct spi_device *spi) val &= ~0x3; /* Clear RX mode (normal) */ write_short_reg(devrec, REG_RXMCR, val); - ret = request_irq(spi->irq, - mrf24j40_isr, - IRQF_TRIGGER_FALLING, - dev_name(&spi->dev), - devrec); + ret = request_threaded_irq(spi->irq, + NULL, + mrf24j40_isr, + IRQF_TRIGGER_FALLING|IRQF_ONESHOT, + dev_name(&spi->dev), + devrec); if (ret) { dev_err(printdev(devrec), "Unable to get IRQ"); @@ -721,7 +709,6 @@ static int mrf24j40_remove(struct spi_device *spi) dev_dbg(printdev(devrec), "remove\n"); free_irq(spi->irq, devrec); - flush_work(&devrec->irqwork); /* TODO: Is this the right call? */ ieee802154_unregister_device(devrec->dev); ieee802154_free_device(devrec->dev); /* TODO: Will ieee802154_free_device() wait until ->xmit() is -- cgit v0.10.2 From 40afbb657352e92726c40cc4ddf3cf0be6800dba Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Sat, 5 Oct 2013 23:52:24 -0400 Subject: mrf24j40: Use level-triggered interrupts The mrf24j40 generates level interrupts. There are rare cases where it appears that the interrupt line never gets de-asserted between interrupts, causing interrupts to be lost, and causing a hung device from the driver's perspective. Switching the driver to interpret these interrupts as level-triggered fixes this issue. Signed-off-by: Alan Ott Signed-off-by: David S. Miller diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index c1bc688..0632d34 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -678,7 +678,7 @@ static int mrf24j40_probe(struct spi_device *spi) ret = request_threaded_irq(spi->irq, NULL, mrf24j40_isr, - IRQF_TRIGGER_FALLING|IRQF_ONESHOT, + IRQF_TRIGGER_LOW|IRQF_ONESHOT, dev_name(&spi->dev), devrec); -- cgit v0.10.2 From 5c0c52c9102f90c9d5674fb6c5f4f23e3019dd9f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 6 Oct 2013 21:25:12 +0300 Subject: tun: don't look at current when non-blocking We play with a wait queue even if socket is non blocking. This is an obvious waste. Besides, it will prevent calling the non blocking variant when current is not valid. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 807815f..7cb105c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1293,7 +1293,8 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, if (unlikely(!noblock)) add_wait_queue(&tfile->wq.wait, &wait); while (len) { - current->state = TASK_INTERRUPTIBLE; + if (unlikely(!noblock)) + current->state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) { @@ -1320,9 +1321,10 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, break; } - current->state = TASK_RUNNING; - if (unlikely(!noblock)) + if (unlikely(!noblock)) { + current->state = TASK_RUNNING; remove_wait_queue(&tfile->wq.wait, &wait); + } return ret; } -- cgit v0.10.2 From 8d8a51e26a6d415e1470759f2cf5f3ee3ee86196 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Oct 2013 15:44:26 -0400 Subject: l2tp: Fix build warning with ipv6 disabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/l2tp/l2tp_core.c: In function ‘l2tp_verify_udp_checksum’: net/l2tp/l2tp_core.c:499:22: warning: unused variable ‘tunnel’ [-Wunused-variable] Create a helper "l2tp_tunnel()" to facilitate this, and as a side effect get rid of a bunch of unnecessary void pointer casts. Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index aedaa2c..b076e83 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -115,6 +115,11 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); +static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) +{ + return sk->sk_user_data; +} + static inline struct l2tp_net *l2tp_pernet(struct net *net) { BUG_ON(!net); @@ -496,7 +501,6 @@ out: static inline int l2tp_verify_udp_checksum(struct sock *sk, struct sk_buff *skb) { - struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; struct udphdr *uh = udp_hdr(skb); u16 ulen = ntohs(uh->len); __wsum psum; @@ -505,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return 0; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { + if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { if (!uh->check) { LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); return 1; @@ -1305,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); */ static void l2tp_tunnel_destruct(struct sock *sk) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); struct l2tp_net *pn; - tunnel = sk->sk_user_data; if (tunnel == NULL) goto end; @@ -1676,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } /* Check if this socket has already been prepped */ - tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + tunnel = l2tp_tunnel(sk); if (tunnel != NULL) { /* This socket has already been prepped */ err = -EBUSY; -- cgit v0.10.2 From 2c6221e4a5aab417cb18bef6f1130d1374240258 Mon Sep 17 00:00:00 2001 From: Nguyen Hong Ky Date: Mon, 7 Oct 2013 15:29:25 +0900 Subject: net: sh_eth: Fix RX packets errors on R8A7740 This patch will fix RX packets errors when receiving big size of data by set bit RNC = 1. RNC - Receive Enable Control 0: Upon completion of reception of one frame, the E-DMAC writes the receive status to the descriptor and clears the RR bit in EDRRR to 0. 1: Upon completion of reception of one frame, the E-DMAC writes (writes back) the receive status to the descriptor. In addition, the E-DMAC reads the next descriptor and prepares for reception of the next frame. In addition, for get more stable when receiving packets, I set maximum size for the transmit/receive FIFO and inserts padding in receive data. Signed-off-by: Nguyen Hong Ky Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 5cd831e..c7e1b8f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -620,12 +620,16 @@ static struct sh_eth_cpu_data sh7734_data = { .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI, + .fdr_value = 0x0000070f, + .rmcr_value = 0x00000001, .apr = 1, .mpr = 1, .tpauser = 1, .bculr = 1, .hw_swap = 1, + .rpadir = 1, + .rpadir_value = 2 << 16, .no_trimd = 1, .no_ade = 1, .tsu = 1, -- cgit v0.10.2 From 70fbe0794393829d9acd686428d87c27b6f6984b Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 7 Oct 2013 13:38:12 +0200 Subject: net/mlx4_en: Rename name of mlx4_en_rx_alloc members Add page prefix to page related members: @size and @offset into @page_size and @page_offset CC: Eric Dumazet Signed-off-by: Amir Vadai Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index dec455c..066fc27 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -70,14 +70,15 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, put_page(page); return -ENOMEM; } - page_alloc->size = PAGE_SIZE << order; + page_alloc->page_size = PAGE_SIZE << order; page_alloc->page = page; page_alloc->dma = dma; - page_alloc->offset = frag_info->frag_align; + page_alloc->page_offset = frag_info->frag_align; /* Not doing get_page() for each frag is a big win * on asymetric workloads. */ - atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride); + atomic_set(&page->_count, + page_alloc->page_size / frag_info->frag_stride); return 0; } @@ -96,16 +97,19 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, for (i = 0; i < priv->num_frags; i++) { frag_info = &priv->frag_info[i]; page_alloc[i] = ring_alloc[i]; - page_alloc[i].offset += frag_info->frag_stride; - if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size) + page_alloc[i].page_offset += frag_info->frag_stride; + + if (page_alloc[i].page_offset + frag_info->frag_stride <= + ring_alloc[i].page_size) continue; + if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) goto out; } for (i = 0; i < priv->num_frags; i++) { frags[i] = ring_alloc[i]; - dma = ring_alloc[i].dma + ring_alloc[i].offset; + dma = ring_alloc[i].dma + ring_alloc[i].page_offset; ring_alloc[i] = page_alloc[i]; rx_desc->data[i].addr = cpu_to_be64(dma); } @@ -117,7 +121,7 @@ out: frag_info = &priv->frag_info[i]; if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].size, PCI_DMA_FROMDEVICE); + page_alloc[i].page_size, PCI_DMA_FROMDEVICE); page = page_alloc[i].page; atomic_set(&page->_count, 1); put_page(page); @@ -132,9 +136,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, { const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - if (frags[i].offset + frag_info->frag_stride > frags[i].size) - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size, - PCI_DMA_FROMDEVICE); + if (frags[i].page_offset + frag_info->frag_stride > + frags[i].page_size) + dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, + PCI_DMA_FROMDEVICE); if (frags[i].page) put_page(frags[i].page); @@ -161,7 +166,7 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, PCI_DMA_FROMDEVICE); page = page_alloc->page; atomic_set(&page->_count, 1); put_page(page); @@ -184,10 +189,11 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->size, PCI_DMA_FROMDEVICE); - while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) { + page_alloc->page_size, PCI_DMA_FROMDEVICE); + while (page_alloc->page_offset + frag_info->frag_stride < + page_alloc->page_size) { put_page(page_alloc->page); - page_alloc->offset += frag_info->frag_stride; + page_alloc->page_offset += frag_info->frag_stride; } page_alloc->page = NULL; } @@ -478,7 +484,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, /* Save page reference in skb */ __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); - skb_frags_rx[nr].page_offset = frags[nr].offset; + skb_frags_rx[nr].page_offset = frags[nr].page_offset; skb->truesize += frag_info->frag_stride; frags[nr].page = NULL; } @@ -517,7 +523,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, /* Get pointer to first fragment so we could copy the headers into the * (linear part of the) skb */ - va = page_address(frags[0].page) + frags[0].offset; + va = page_address(frags[0].page) + frags[0].page_offset; if (length <= SMALL_PACKET_SIZE) { /* We are copying all relevant data to the skb - temporarily @@ -645,7 +651,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); ethh = (struct ethhdr *)(page_address(frags[0].page) + - frags[0].offset); + frags[0].page_offset); if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 5e0aa56..bf06e36 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -237,8 +237,8 @@ struct mlx4_en_tx_desc { struct mlx4_en_rx_alloc { struct page *page; dma_addr_t dma; - u32 offset; - u32 size; + u32 page_offset; + u32 page_size; }; struct mlx4_en_tx_ring { -- cgit v0.10.2 From 021f1107ffdae7a82af6c53f4c52654062e365c6 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 7 Oct 2013 13:38:13 +0200 Subject: net/mlx4_en: Fix pages never dma unmapped on rx This patch fixes a bug introduced by commit 51151a16 (mlx4: allow order-0 memory allocations in RX path). dma_unmap_page never reached because condition to detect last fragment in page is wrong. offset+frag_stride can't be greater than size, need to make sure no additional frag will fit in page => compare offset + frag_stride + next_frag_size instead. next_frag_size is the same as the current one, since page is shared only with frags of the same size. CC: Eric Dumazet Signed-off-by: Amir Vadai Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 066fc27..afe2efa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -135,9 +135,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, int i) { const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; - if (frags[i].page_offset + frag_info->frag_stride > - frags[i].page_size) + + if (next_frag_end > frags[i].page_size) dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, PCI_DMA_FROMDEVICE); -- cgit v0.10.2 From dc62ccaccfb139d9b04bbc5a2688a4402adbfab3 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 7 Oct 2013 13:55:19 +0100 Subject: xen-netback: transition to CLOSED when removing a VIF If a guest is destroyed without transitioning its frontend to CLOSED, the domain becomes a zombie as netback was not grant unmapping the shared rings. When removing a VIF, transition the backend to CLOSED so the VIF is disconnected if necessary (which will unmap the shared rings etc). This fixes a regression introduced by 279f438e36c0a70b23b86d2090aeec50155034a9 (xen-netback: Don't destroy the netdev until the vif is shut down). Signed-off-by: David Vrabel Cc: Ian Campbell Cc: Wei Liu Cc: Paul Durrant Acked-by: Wei Liu Reviewed-by: Paul Durrant Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index b45bce2..1b08d87 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -39,11 +39,15 @@ static int connect_rings(struct backend_info *); static void connect(struct backend_info *); static void backend_create_xenvif(struct backend_info *be); static void unregister_hotplug_status_watch(struct backend_info *be); +static void set_backend_state(struct backend_info *be, + enum xenbus_state state); static int netback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); + set_backend_state(be, XenbusStateClosed); + unregister_hotplug_status_watch(be); if (be->vif) { kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); -- cgit v0.10.2 From 0e719e3a53cc03bc1bafbb2cae1ddf99a0d73ab0 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Mon, 7 Oct 2013 18:50:05 +0100 Subject: ipv6: Fix the upper MTU limit in GRE tunnel Unlike ipv4, the struct member hlen holds the length of the GRE and ipv6 headers. This length is also counted in dev->hard_header_len. Perhaps, it's more clean to modify the hlen to count only the GRE header without ipv6 header as the variable name suggest, but the simple way to fix this without regression risk is simply modify the calculation of the limit in ip6gre_tunnel_change_mtu function. Verified in kernel version v3.11. Signed-off-by: Oussama Ghorbel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 7bb5446..1ef1fa2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1173,9 +1173,8 @@ done: static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - struct ip6_tnl *tunnel = netdev_priv(dev); if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + new_mtu > 0xFFF8 - dev->hard_header_len) return -EINVAL; dev->mtu = new_mtu; return 0; -- cgit v0.10.2 From ede869cd0f45488195d56267d0c09ed511611d66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Oct 2013 12:50:18 -0700 Subject: pkt_sched: fq: fix typo for initial_quantum TCA_FQ_INITIAL_QUANTUM should set q->initial_quantum Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a2fef8b..48501a2 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -656,7 +656,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); if (tb[TCA_FQ_INITIAL_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); -- cgit v0.10.2 From c33a39c575068c2ea9bffb22fd6de2df19c74b89 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 7 Oct 2013 23:19:58 +0200 Subject: net: vlan: fix nlmsg size calculation in vlan_get_size() This patch fixes the calculation of the nlmsg size, by adding the missing nla_total_size(). Cc: Patrick McHardy Signed-off-by: Marc Kleine-Budde Signed-off-by: David S. Miller diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 3091297..c7e634a 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev) return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ nla_total_size(2) + /* IFLA_VLAN_ID */ - sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ + nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); } -- cgit v0.10.2 From bdfd6304c8e2439fc047fafccf05f145004260bd Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 8 Oct 2013 11:19:19 +0800 Subject: moxa: fix the error handling in moxart_mac_probe() This patch fix the error handling in moxart_mac_probe(): - return -ENOMEM in some memory alloc fail cases - add missing free_netdev() in the error handling case Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index bd1a2d2..ea54d95 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -448,7 +448,8 @@ static int moxart_mac_probe(struct platform_device *pdev) irq = irq_of_parse_and_map(node, 0); if (irq <= 0) { netdev_err(ndev, "irq_of_parse_and_map failed\n"); - return -EINVAL; + ret = -EINVAL; + goto irq_map_fail; } priv = netdev_priv(ndev); @@ -472,24 +473,32 @@ static int moxart_mac_probe(struct platform_device *pdev) priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE * TX_DESC_NUM, &priv->tx_base, GFP_DMA | GFP_KERNEL); - if (priv->tx_desc_base == NULL) + if (priv->tx_desc_base == NULL) { + ret = -ENOMEM; goto init_fail; + } priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE * RX_DESC_NUM, &priv->rx_base, GFP_DMA | GFP_KERNEL); - if (priv->rx_desc_base == NULL) + if (priv->rx_desc_base == NULL) { + ret = -ENOMEM; goto init_fail; + } priv->tx_buf_base = kmalloc(priv->tx_buf_size * TX_DESC_NUM, GFP_ATOMIC); - if (!priv->tx_buf_base) + if (!priv->tx_buf_base) { + ret = -ENOMEM; goto init_fail; + } priv->rx_buf_base = kmalloc(priv->rx_buf_size * RX_DESC_NUM, GFP_ATOMIC); - if (!priv->rx_buf_base) + if (!priv->rx_buf_base) { + ret = -ENOMEM; goto init_fail; + } platform_set_drvdata(pdev, ndev); @@ -522,7 +531,8 @@ static int moxart_mac_probe(struct platform_device *pdev) init_fail: netdev_err(ndev, "init failed\n"); moxart_mac_free_memory(ndev); - +irq_map_fail: + free_netdev(ndev); return ret; } -- cgit v0.10.2 From 2b1f18a4d6ae8057a93e736a34cdcca925279403 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 8 Oct 2013 11:32:17 +0800 Subject: qlcnic: add missing destroy_workqueue() on error path in qlcnic_probe() Add the missing destroy_workqueue() before return from qlcnic_probe() in the error handling case. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 21d00a0..f07f2b0 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2257,7 +2257,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = qlcnic_alloc_adapter_resources(adapter); if (err) - goto err_out_free_netdev; + goto err_out_free_wq; adapter->dev_rst_time = jiffies; adapter->ahw->revision_id = pdev->revision; @@ -2396,6 +2396,9 @@ err_out_disable_msi: err_out_free_hw: qlcnic_free_adapter_resources(adapter); +err_out_free_wq: + destroy_workqueue(adapter->qlcnic_wq); + err_out_free_netdev: free_netdev(netdev); -- cgit v0.10.2 From 7eec4174ff29cd42f2acfae8112f51c228545d40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Oct 2013 15:16:00 -0700 Subject: pkt_sched: fq: fix non TCP flows pacing Steinar reported FQ pacing was not working for UDP flows. It looks like the initial sk->sk_pacing_rate value of 0 was a wrong choice. We should init it to ~0U (unlimited) Then, TCA_FQ_FLOW_DEFAULT_RATE should be removed because it makes no real sense. The default rate is really unlimited, and we need to avoid a zero divide. Reported-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/sock.c b/net/core/sock.c index 5b6beba..0b39e7a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2319,6 +2319,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_ll_usec = sysctl_net_busy_read; #endif + sk->sk_pacing_rate = ~0U; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 48501a2..a9dfdda 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -472,20 +472,16 @@ begin: if (f->credit > 0 || !q->rate_enable) goto out; - if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { - rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; + rate = q->flow_max_rate; + if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + rate = min(skb->sk->sk_pacing_rate, rate); - rate = min(rate, q->flow_max_rate); - } else { - rate = q->flow_max_rate; - if (rate == ~0U) - goto out; - } - if (rate) { + if (rate != ~0U) { u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; - do_div(len, rate); + if (likely(rate)) + do_div(len, rate); /* Since socket rate can change later, * clamp the delay to 125 ms. * TODO: maybe segment the too big skb, as in commit @@ -735,12 +731,14 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; + /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore, + * do not bother giving its value + */ if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || - nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; -- cgit v0.10.2