From 1dace8c801ac531022bd31a7316a6b4351837617 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 4 Mar 2010 16:10:14 -0500 Subject: vhost: fix error path in vhost_net_set_backend An error could cause vhost_net_set_backend to exit without unlocking vq->mutex. Fix this. Signed-off-by: Jeff Dike Signed-off-by: Michael S. Tsirkin diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ad37da2..fcafb6b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -508,12 +508,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) /* Verify that ring has been setup correctly. */ if (!vhost_vq_access_ok(vq)) { r = -EFAULT; - goto err; + goto err_vq; } sock = get_socket(fd); if (IS_ERR(sock)) { r = PTR_ERR(sock); - goto err; + goto err_vq; } /* start polling new socket */ @@ -524,12 +524,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); rcu_assign_pointer(vq->private_data, sock); vhost_net_enable_vq(n, vq); - mutex_unlock(&vq->mutex); done: if (oldsock) { vhost_net_flush_vq(n, index); fput(oldsock->file); } + +err_vq: + mutex_unlock(&vq->mutex); err: mutex_unlock(&n->dev.mutex); return r; -- cgit v0.10.2 From 3f60ebc9d6291863652d564bacc430629271e6a9 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Thu, 11 Mar 2010 17:45:26 +0200 Subject: wl1251: fix potential crash In case debugfs does not init for some reason (or is disabled on older kernels) driver does not allocate stats.fw_stats structure, but tries to clear it later and trips on a NULL pointer: Unable to handle kernel NULL pointer dereference at virtual address 00000000 PC is at __memzero+0x24/0x80 Backtrace: [] (wl1251_debugfs_reset+0x0/0x30 [wl1251]) [] (wl1251_op_stop+0x0/0x12c [wl1251]) [] (ieee80211_stop_device+0x0/0x74 [mac80211]) [] (ieee80211_stop+0x0/0x4ac [mac80211]) [] (dev_close+0x0/0xb4) [] (dev_change_flags+0x0/0x184) [] (devinet_ioctl+0x0/0x704) [] (inet_ioctl+0x0/0x100) Add a NULL pointer check to fix this. Signed-off-by: Grazvydas Ignotas Acked-by: Kalle Valo Cc: stable@kernel.org Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/wl12xx/wl1251_debugfs.c b/drivers/net/wireless/wl12xx/wl1251_debugfs.c index 0ccba57..05e4d68 100644 --- a/drivers/net/wireless/wl12xx/wl1251_debugfs.c +++ b/drivers/net/wireless/wl12xx/wl1251_debugfs.c @@ -466,7 +466,8 @@ out: void wl1251_debugfs_reset(struct wl1251 *wl) { - memset(wl->stats.fw_stats, 0, sizeof(*wl->stats.fw_stats)); + if (wl->stats.fw_stats != NULL) + memset(wl->stats.fw_stats, 0, sizeof(*wl->stats.fw_stats)); wl->stats.retry_count = 0; wl->stats.excessive_retries = 0; } -- cgit v0.10.2 From 4fdec031b9169b3c17938b9c4168f099f457169c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 12 Mar 2010 04:02:43 +0100 Subject: ath9k: fix BUG_ON triggered by PAE frames When I initially stumbled upon sequence number problems with PAE frames in ath9k, I submitted a patch to remove all special cases for PAE frames and let them go through the normal transmit path. Out of concern about crypto incompatibility issues, this change was merged instead: commit 6c8afef551fef87a3bf24f8a74c69a7f2f72fc82 Author: Sujith Date: Tue Feb 9 10:07:00 2010 +0530 ath9k: Fix sequence numbers for PAE frames After a lot of testing, I'm able to reliably trigger a driver crash on rekeying with current versions with this change in place. It seems that the driver does not support sending out regular MPDUs with the same TID while an A-MPDU session is active. This leads to duplicate entries in the TID Tx buffer, which hits the following BUG_ON in ath_tx_addto_baw(): index = ATH_BA_INDEX(tid->seq_start, bf->bf_seqno); cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1); BUG_ON(tid->tx_buf[cindex] != NULL); I believe until we actually have a reproducible case of an incompatibility with another AP using no PAE special cases, we should simply get rid of this mess. This patch completely fixes my crash issues in STA mode and makes it stay connected without throughput drops or connectivity issues even when the AP is configured to a very short group rekey interval. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index b2c8207..294b486 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1353,25 +1353,6 @@ static enum ath9k_pkt_type get_hw_packet_type(struct sk_buff *skb) return htype; } -static bool is_pae(struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr; - __le16 fc; - - hdr = (struct ieee80211_hdr *)skb->data; - fc = hdr->frame_control; - - if (ieee80211_is_data(fc)) { - if (ieee80211_is_nullfunc(fc) || - /* Port Access Entity (IEEE 802.1X) */ - (skb->protocol == cpu_to_be16(ETH_P_PAE))) { - return true; - } - } - - return false; -} - static int get_hw_crypto_keytype(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); @@ -1696,7 +1677,7 @@ static void ath_tx_start_dma(struct ath_softc *sc, struct ath_buf *bf, goto tx_done; } - if ((tx_info->flags & IEEE80211_TX_CTL_AMPDU) && !is_pae(skb)) { + if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) { /* * Try aggregation if it's a unicast data frame * and the destination is HT capable. -- cgit v0.10.2 From c8406ea8fa1adde8dc5400127281d497bbcdb84a Mon Sep 17 00:00:00 2001 From: Adel Gadllah Date: Sun, 14 Mar 2010 19:16:25 +0100 Subject: iwlwifi: Silence tfds_in_queue message Commit a239a8b47cc0e5e6d7416a89f340beac06d5edaa introduced a noisy message, that fills up the log very fast. The error seems not to be fatal (the connection is stable and performance is ok), so make it IWL_DEBUG_TX rather than IWL_ERR. Signed-off-by: Adel Gadllah Cc: stable@kernel.org Acked-by: Reinette Chatre Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 1ed5206..8c12311 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -124,7 +124,7 @@ void iwl_free_tfds_in_queue(struct iwl_priv *priv, if (priv->stations[sta_id].tid[tid].tfds_in_queue >= freed) priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; else { - IWL_ERR(priv, "free more than tfds_in_queue (%u:%d)\n", + IWL_DEBUG_TX(priv, "free more than tfds_in_queue (%u:%d)\n", priv->stations[sta_id].tid[tid].tfds_in_queue, freed); priv->stations[sta_id].tid[tid].tfds_in_queue = 0; -- cgit v0.10.2 From 0e255572121180c900e24e33b87047abd8153cce Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 8 Mar 2010 23:24:22 +0200 Subject: vhost: fix interrupt mitigation with raw sockets A thinko in code means we never trigger interrupt mitigation. Fix this. Reported-by: Juan Quintela Reported-by: Unai Uribarri Signed-off-by: Michael S. Tsirkin diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index fcafb6b..a6a88df 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -125,7 +125,7 @@ static void handle_tx(struct vhost_net *net) mutex_lock(&vq->mutex); vhost_disable_notify(vq); - if (wmem < sock->sk->sk_sndbuf * 2) + if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); hdr_size = vq->hdr_size; -- cgit v0.10.2 From 535297a6ae4c3b7a0562e71fac15c213eeec68e7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 17 Mar 2010 16:06:11 +0200 Subject: vhost: fix error handling in vring ioctls Stanse found a locking problem in vhost_set_vring: several returns from VHOST_SET_VRING_KICK, VHOST_SET_VRING_CALL, VHOST_SET_VRING_ERR with the vq->mutex held. Fix these up. Reported-by: Jiri Slaby Acked-by: Laurent Chavey Signed-off-by: Michael S. Tsirkin diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7cd55e0..7bd7a1e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -476,8 +476,10 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) if (r < 0) break; eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); - if (IS_ERR(eventfp)) - return PTR_ERR(eventfp); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } if (eventfp != vq->kick) { pollstop = filep = vq->kick; pollstart = vq->kick = eventfp; @@ -489,8 +491,10 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) if (r < 0) break; eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); - if (IS_ERR(eventfp)) - return PTR_ERR(eventfp); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } if (eventfp != vq->call) { filep = vq->call; ctx = vq->call_ctx; @@ -505,8 +509,10 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) if (r < 0) break; eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); - if (IS_ERR(eventfp)) - return PTR_ERR(eventfp); + if (IS_ERR(eventfp)) { + r = PTR_ERR(eventfp); + break; + } if (eventfp != vq->error) { filep = vq->error; vq->error = eventfp; -- cgit v0.10.2 From 22001a13d09d82772e831dcdac0553994a4bac5d Mon Sep 17 00:00:00 2001 From: Tilman Schmidt Date: Wed, 17 Mar 2010 14:22:07 -0700 Subject: gigaset: fix build failure Update the dummy LL interface to the LL interface change introduced by commit daab433c03c15fd642c71c94eb51bdd3f32602c8. This fixes the build failure occurring after that commit when enabling ISDN_DRV_GIGASET but neither ISDN_I4L nor ISDN_CAPI. Impact: bugfix Signed-off-by: Tilman Schmidt Signed-off-by: David S. Miller diff --git a/drivers/isdn/gigaset/dummyll.c b/drivers/isdn/gigaset/dummyll.c index 5b27c99..bd0b1ea 100644 --- a/drivers/isdn/gigaset/dummyll.c +++ b/drivers/isdn/gigaset/dummyll.c @@ -57,12 +57,20 @@ void gigaset_isdn_stop(struct cardstate *cs) { } -int gigaset_isdn_register(struct cardstate *cs, const char *isdnid) +int gigaset_isdn_regdev(struct cardstate *cs, const char *isdnid) { - pr_info("no ISDN subsystem interface\n"); return 1; } -void gigaset_isdn_unregister(struct cardstate *cs) +void gigaset_isdn_unregdev(struct cardstate *cs) +{ +} + +void gigaset_isdn_regdrv(void) +{ + pr_info("no ISDN subsystem interface\n"); +} + +void gigaset_isdn_unregdrv(void) { } -- cgit v0.10.2 From b634f87522dff87712df8bda2a6c9061954d552a Mon Sep 17 00:00:00 2001 From: Alexandra Kossovsky Date: Thu, 18 Mar 2010 20:29:24 -0700 Subject: tcp: Fix OOB POLLIN avoidance. From: Alexandra.Kossovsky@oktetlabs.ru Fixes kernel bugzilla #15541 Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5901010..ae16f80 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -429,7 +429,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_seq == tp->copied_seq && !sock_flag(sk, SOCK_URGINLINE) && tp->urg_data) - target--; + target++; /* Potential race condition. If read of tp below will * escape above sk->sk_state, we can be illegally awaken -- cgit v0.10.2 From 658cc524305c9759019c4430ded231f631472482 Mon Sep 17 00:00:00 2001 From: Abraham Arce Date: Tue, 16 Mar 2010 12:24:54 +0000 Subject: KS8851: Avoid NULL pointer in set rx mode Kernel NULL pointer dereference when setting mode for IFF_MULTICAST. Tested on SDP OMAP4430 board. ks8851 spi1.0: message enable is 0 ks8851 spi1.0: revision 0, MAC f2:f4:2f:56:37:de, IRQ 194 Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP last sysfs file: Modules linked in: CPU: 0 Not tainted (2.6.34-rc1-01039-g38d7ed1-dirty #3) PC is at ks8851_set_rx_mode+0x88/0x124 LR is at bitrev32+0x24/0x2c Backtrace: [] ? (ks8851_set_rx_mode+0x0/0x124) [] (__dev_set_rx_mode+0x0/0x90) [] (dev_mc_add+0x0/0x78) [] (igmp_group_added+0x0/0x64) [] (ip_mc_inc_group+0x0/0x150) [] (ip_mc_up+0x0/0x64) [] (inetdev_event+0x0/0x3d4) [] (notifier_call_chain+0x0/0x78) [] (__raw_notifier_call_chain+0x0/0x24) [] (raw_notifier_call_chain+0x0/0x28) [] (call_netdevice_notifiers+0x0/0x24) [] (__dev_notify_flags+0x0/0x68) [] (dev_change_flags+0x0/0x4c) [] (ip_auto_config+0x0/0xf1c) [] (do_one_initcall+0x0/0x1bc) [] (kernel_init+0x0/0x234) Code: e15130bc e1833012 e14130bc e5943000 (e5934000) ---[ end trace ed0fb00a94142792 ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Abraham Arce Signed-off-by: David S. Miller diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c index 0573e0b..13cc1ca 100644 --- a/drivers/net/ks8851.c +++ b/drivers/net/ks8851.c @@ -976,7 +976,6 @@ static void ks8851_set_rx_mode(struct net_device *dev) crc >>= (32 - 6); /* get top six bits */ rxctrl.mchash[crc >> 4] |= (1 << (crc & 0xf)); - mcptr = mcptr->next; } rxctrl.rxcr1 = RXCR1_RXME | RXCR1_RXPAFMA; -- cgit v0.10.2 From 17da69b8bfbe441a33a873ad5dd7d3d85800bf2b Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Wed, 17 Mar 2010 00:09:29 +0000 Subject: jme: Fix VLAN memory leak Fix memory leak while receiving 8021q tagged packet which is not registered by user. Signed-off-by: Guo-Fu Tseng Cc: stable@kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/jme.c b/drivers/net/jme.c index 0f31497..cfc7b98 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -946,6 +946,8 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx) jme->jme_vlan_rx(skb, jme->vlgrp, le16_to_cpu(rxdesc->descwb.vlan)); NET_STAT(jme).rx_bytes += 4; + } else { + dev_kfree_skb(skb); } } else { jme->jme_rx(skb); -- cgit v0.10.2 From bf5e5360fd1df1ae429ebbd81838d7d0879797d1 Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Wed, 17 Mar 2010 00:09:30 +0000 Subject: jme: Protect vlgrp structure by pause RX actions. Temporary stop the RX IRQ, and disable (sync) tasklet or napi. And restore it after finished the vlgrp pointer assignment. Signed-off-by: Guo-Fu Tseng Cc: stable@kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/jme.c b/drivers/net/jme.c index cfc7b98..c0b59a5 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -2083,12 +2083,45 @@ jme_tx_timeout(struct net_device *netdev) jme_reset_link(jme); } +static inline void jme_pause_rx(struct jme_adapter *jme) +{ + atomic_dec(&jme->link_changing); + + jme_set_rx_pcc(jme, PCC_OFF); + if (test_bit(JME_FLAG_POLL, &jme->flags)) { + JME_NAPI_DISABLE(jme); + } else { + tasklet_disable(&jme->rxclean_task); + tasklet_disable(&jme->rxempty_task); + } +} + +static inline void jme_resume_rx(struct jme_adapter *jme) +{ + struct dynpcc_info *dpi = &(jme->dpi); + + if (test_bit(JME_FLAG_POLL, &jme->flags)) { + JME_NAPI_ENABLE(jme); + } else { + tasklet_hi_enable(&jme->rxclean_task); + tasklet_hi_enable(&jme->rxempty_task); + } + dpi->cur = PCC_P1; + dpi->attempt = PCC_P1; + dpi->cnt = 0; + jme_set_rx_pcc(jme, PCC_P1); + + atomic_inc(&jme->link_changing); +} + static void jme_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) { struct jme_adapter *jme = netdev_priv(netdev); + jme_pause_rx(jme); jme->vlgrp = grp; + jme_resume_rx(jme); } static void -- cgit v0.10.2 From 54d259d474e1fee6f6bb8f0f1360d85195199ac5 Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Wed, 17 Mar 2010 00:09:31 +0000 Subject: jme: Advance driver version number Advance driver version number after some bug fix. Signed-off-by: Guo-Fu Tseng Signed-off-by: David S. Miller diff --git a/drivers/net/jme.h b/drivers/net/jme.h index c19db91..07ad3a4 100644 --- a/drivers/net/jme.h +++ b/drivers/net/jme.h @@ -25,7 +25,7 @@ #define __JME_H_INCLUDED__ #define DRV_NAME "jme" -#define DRV_VERSION "1.0.5" +#define DRV_VERSION "1.0.6" #define PFX DRV_NAME ": " #define PCI_DEVICE_ID_JMICRON_JMC250 0x0250 -- cgit v0.10.2 From 0641e4fbf2f824faee00ea74c459a088d94905fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Mar 2010 21:16:45 -0700 Subject: net: Potential null skb->dev dereference When doing "ifenslave -d bond0 eth0", there is chance to get NULL dereference in netif_receive_skb(), because dev->master suddenly becomes NULL after we tested it. We should use ACCESS_ONCE() to avoid this (or rcu_dereference()) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c79a88b..fa8b476 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2059,12 +2059,12 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and * ARP on active-backup slaves with arp_validate enabled. */ -static inline int skb_bond_should_drop(struct sk_buff *skb) +static inline int skb_bond_should_drop(struct sk_buff *skb, + struct net_device *master) { - struct net_device *dev = skb->dev; - struct net_device *master = dev->master; - if (master) { + struct net_device *dev = skb->dev; + if (master->priv_flags & IFF_MASTER_ARPMON) dev->last_rx = jiffies; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index c0316e0..c584a0a 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -11,7 +11,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, if (netpoll_rx(skb)) return NET_RX_DROP; - if (skb_bond_should_drop(skb)) + if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) goto drop; skb->skb_iif = skb->dev->ifindex; @@ -83,7 +83,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, { struct sk_buff *p; - if (skb_bond_should_drop(skb)) + if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) goto drop; skb->skb_iif = skb->dev->ifindex; diff --git a/net/core/dev.c b/net/core/dev.c index bcc490c..59d4394 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2483,6 +2483,7 @@ int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; + struct net_device *master; struct net_device *null_or_orig; struct net_device *null_or_bond; int ret = NET_RX_DROP; @@ -2503,11 +2504,12 @@ int netif_receive_skb(struct sk_buff *skb) null_or_orig = NULL; orig_dev = skb->dev; - if (orig_dev->master) { - if (skb_bond_should_drop(skb)) + master = ACCESS_ONCE(orig_dev->master); + if (master) { + if (skb_bond_should_drop(skb, master)) null_or_orig = orig_dev; /* deliver only exact match */ else - skb->dev = orig_dev->master; + skb->dev = master; } __get_cpu_var(netdev_rx_stat).total++; -- cgit v0.10.2 From 1097cd17700c4e9903b7bbfcec1432f61784cb53 Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Thu, 18 Mar 2010 14:34:52 +0000 Subject: ixgbe: Fix 82599 multispeed fiber link issues due to Tx laser flapping Fix 82599 link issues during driver load and unload test using multi-speed 10G & 1G fiber modules. When connected back to back sometime 82599 multispeed fiber modules would link at 1G speed instead of 10G highest speed, due to a race condition in autotry process involving Tx laser flapping. Move autotry autoneg-37 tx laser flapping process from multispeed module init setup to driver unload. This will alert the link partner to restart its autotry process when it tries to establish the link with the link partner Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c index 1f30e16..b405a00 100644 --- a/drivers/net/ixgbe/ixgbe_82599.c +++ b/drivers/net/ixgbe/ixgbe_82599.c @@ -39,6 +39,7 @@ #define IXGBE_82599_MC_TBL_SIZE 128 #define IXGBE_82599_VFT_TBL_SIZE 128 +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg, @@ -68,7 +69,9 @@ static void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) if (hw->phy.multispeed_fiber) { /* Set up dual speed SFP+ support */ mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber; + mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber; } else { + mac->ops.flap_tx_laser = NULL; if ((mac->ops.get_media_type(hw) == ixgbe_media_type_backplane) && (hw->phy.smart_speed == ixgbe_smart_speed_auto || @@ -413,6 +416,41 @@ s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, } /** + * ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser + * @hw: pointer to hardware structure + * + * When the driver changes the link speeds that it can support, + * it sets autotry_restart to true to indicate that we need to + * initiate a new autotry session with the link partner. To do + * so, we set the speed then disable and re-enable the tx laser, to + * alert the link partner that it also needs to restart autotry on its + * end. This is consistent with true clause 37 autoneg, which also + * involves a loss of signal. + **/ +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + + hw_dbg(hw, "ixgbe_flap_tx_laser_multispeed_fiber\n"); + + if (hw->mac.autotry_restart) { + /* Disable tx laser; allow 100us to go dark per spec */ + esdp_reg |= IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + udelay(100); + + /* Enable tx laser; allow 100ms to light up */ + esdp_reg &= ~IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + msleep(100); + + hw->mac.autotry_restart = false; + } +} + +/** * ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed * @hw: pointer to hardware structure * @speed: new link speed @@ -440,16 +478,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, speed &= phy_link_speed; /* - * When the driver changes the link speeds that it can support, - * it sets autotry_restart to true to indicate that we need to - * initiate a new autotry session with the link partner. To do - * so, we set the speed then disable and re-enable the tx laser, to - * alert the link partner that it also needs to restart autotry on its - * end. This is consistent with true clause 37 autoneg, which also - * involves a loss of signal. - */ - - /* * Try each speed one by one, highest priority first. We do this in * software because 10gb fiber doesn't support speed autonegotiation. */ @@ -466,6 +494,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, /* Set the module link speed */ esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5); IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); /* Allow module to change analog characteristics (1G->10G) */ msleep(40); @@ -478,19 +507,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, return status; /* Flap the tx laser if it has not already been done */ - if (hw->mac.autotry_restart) { - /* Disable tx laser; allow 100us to go dark per spec */ - esdp_reg |= IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - udelay(100); - - /* Enable tx laser; allow 2ms to light up per spec */ - esdp_reg &= ~IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - msleep(2); - - hw->mac.autotry_restart = false; - } + hw->mac.ops.flap_tx_laser(hw); /* * Wait for the controller to acquire link. Per IEEE 802.3ap, @@ -525,6 +542,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, esdp_reg &= ~IXGBE_ESDP_SDP5; esdp_reg |= IXGBE_ESDP_SDP5_DIR; IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); /* Allow module to change analog characteristics (10G->1G) */ msleep(40); @@ -537,19 +555,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, return status; /* Flap the tx laser if it has not already been done */ - if (hw->mac.autotry_restart) { - /* Disable tx laser; allow 100us to go dark per spec */ - esdp_reg |= IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - udelay(100); - - /* Enable tx laser; allow 2ms to light up per spec */ - esdp_reg &= ~IXGBE_ESDP_SDP3; - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); - msleep(2); - - hw->mac.autotry_restart = false; - } + hw->mac.ops.flap_tx_laser(hw); /* Wait for the link partner to also set speed */ msleep(100); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 684af37..b858a1a 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -5018,6 +5018,7 @@ static void ixgbe_multispeed_fiber_task(struct work_struct *work) autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiation); + hw->mac.autotry_restart = false; if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, negotiation, true); adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; @@ -6380,6 +6381,16 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) del_timer_sync(&adapter->sfp_timer); cancel_work_sync(&adapter->watchdog_task); cancel_work_sync(&adapter->sfp_task); + if (adapter->hw.phy.multispeed_fiber) { + struct ixgbe_hw *hw = &adapter->hw; + /* + * Restart clause 37 autoneg, disable and re-enable + * the tx laser, to clear & alert the link partner + * that it needs to restart autotry + */ + hw->mac.autotry_restart = true; + hw->mac.ops.flap_tx_laser(hw); + } cancel_work_sync(&adapter->multispeed_fiber_task); cancel_work_sync(&adapter->sfp_config_module_task); if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE || diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index 2be9074..0ed5ab3 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -2397,6 +2397,7 @@ struct ixgbe_mac_operations { s32 (*enable_rx_dma)(struct ixgbe_hw *, u32); /* Link */ + void (*flap_tx_laser)(struct ixgbe_hw *); s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool); s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *, -- cgit v0.10.2 From 0ecad5a262923967147e2d1725e277a2a5fbcdd4 Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Thu, 18 Mar 2010 15:16:56 +0000 Subject: ixgbe: Fix 82599 KX4 Wake on LAN issue after an improper system shutdown Advanced Power Management is disabled for 82599 KX4 connections by clearing GRC.APME bit, causing it to not wake the system from an improper system shutdown. By default GRC.APME is enabled and software is not supposed to clear these settings during adapter probe. Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index b858a1a..18b5b21 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -6246,9 +6246,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, case IXGBE_DEV_ID_82599_KX4: adapter->wol = (IXGBE_WUFC_MAG | IXGBE_WUFC_EX | IXGBE_WUFC_MC | IXGBE_WUFC_BC); - /* Enable ACPI wakeup in GRC */ - IXGBE_WRITE_REG(hw, IXGBE_GRC, - (IXGBE_READ_REG(hw, IXGBE_GRC) & ~IXGBE_GRC_APME)); break; default: adapter->wol = 0; -- cgit v0.10.2 From 11bc3088373e913f165a8652601c6f8b8dc4aea2 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Thu, 18 Mar 2010 22:18:41 -0700 Subject: smsc95xx: Fix tx checksum offload for small packets TX checksum offload does not work properly when transmitting UDP packets with 0, 1 or 2 bytes of data. This patch works around the problem by calculating checksums for these packets in the driver. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index d222d7e..73f9a31 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1189,9 +1189,21 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev, } if (csum) { - u32 csum_preamble = smsc95xx_calc_csum_preamble(skb); - skb_push(skb, 4); - memcpy(skb->data, &csum_preamble, 4); + if (skb->len <= 45) { + /* workaround - hardware tx checksum does not work + * properly with extremely small packets */ + long csstart = skb->csum_start - skb_headroom(skb); + __wsum calc = csum_partial(skb->data + csstart, + skb->len - csstart, 0); + *((__sum16 *)(skb->data + csstart + + skb->csum_offset)) = csum_fold(calc); + + csum = false; + } else { + u32 csum_preamble = smsc95xx_calc_csum_preamble(skb); + skb_push(skb, 4); + memcpy(skb->data, &csum_preamble, 4); + } } skb_push(skb, 4); -- cgit v0.10.2 From d11a4dc18bf41719c9f0d7ed494d295dd2973b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Thu, 18 Mar 2010 23:20:20 +0000 Subject: ipv4: check rt_genid in dst_check Xfrm_dst keeps a reference to ipv4 rtable entries on each cached bundle. The only way to renew xfrm_dst when the underlying route has changed, is to implement dst_check for this. This is what ipv6 side does too. The problems started after 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 ("ipsec: Fix bogus bundle flowi") which fixed a bug causing xfrm_dst to not get reused, until that all lookups always generated new xfrm_dst with new route reference and path mtu worked. But after the fix, the old routes started to get reused even after they were expired causing pmtu to break (well it would occationally work if the rtable gc had run recently and marked the route obsolete causing dst_check to get called). Signed-off-by: Timo Teras Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a770df24..32d3961 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1441,7 +1441,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, dev_hold(rt->u.dst.dev); if (rt->idev) in_dev_hold(rt->idev); - rt->u.dst.obsolete = 0; + rt->u.dst.obsolete = -1; rt->u.dst.lastuse = jiffies; rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; @@ -1506,7 +1506,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) struct dst_entry *ret = dst; if (rt) { - if (dst->obsolete) { + if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || @@ -1726,7 +1726,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - return NULL; + if (rt_is_expired((struct rtable *)dst)) + return NULL; + return dst; } static void ipv4_dst_destroy(struct dst_entry *dst) @@ -1888,7 +1890,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!rth) goto e_nobufs; - rth->u.dst.output= ip_rt_bug; + rth->u.dst.output = ip_rt_bug; + rth->u.dst.obsolete = -1; atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2054,6 +2057,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; + rth->u.dst.obsolete = -1; rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); @@ -2218,6 +2222,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; + rth->u.dst.obsolete = -1; rth->rt_genid = rt_genid(net); atomic_set(&rth->u.dst.__refcnt, 1); @@ -2444,6 +2449,7 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; + rth->u.dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); -- cgit v0.10.2 From 10414444cb8a8ee8893e00390b7cf40502e28352 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 18 Mar 2010 23:00:22 +0000 Subject: ipv6: Remove redundant dst NULL check in ip6_dst_check As the only path leading to ip6_dst_check makes an indirect call through dst->ops, dst cannot be NULL in ip6_dst_check. This patch removes this check in case it misleads people who come across this code. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 52cd3ef..7fcb0e5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -879,7 +879,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; - if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) return dst; return NULL; -- cgit v0.10.2 From 97e3ecd112ba45eb217cddab59f48659bc15d9d0 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 18 Mar 2010 11:27:32 +0000 Subject: TCP: check min TTL on received ICMP packets This adds RFC5082 checks for TTL on received ICMP packets. It adds some security against spoofed ICMP packets disrupting GTSM protected sessions. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 70df409..f4df5f9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -370,6 +370,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sk->sk_state == TCP_CLOSE) goto out; + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); + goto out; + } + icsk = inet_csk(sk); tp = tcp_sk(sk); seq = ntohl(th->seq); -- cgit v0.10.2 From 936332b8e00103fc20eb7e915c9a3bcb2835a11a Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Fri, 19 Mar 2010 04:33:10 +0000 Subject: ixgbe: fix for real_num_tx_queues update issue Currently netdev_features_change is called before fcoe tx queues setup is done, so this patch moves calling of netdev_features_change after tx queues setup is done in ixgbe_init_interrupt_scheme, so that real_num_tx_queues is updated correctly on each fcoe enable or disable. This allows additional fcoe queues updated correctly in vlan driver for their correct queue selection. Signed-off-by: Vasu Dev Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c index 4123dec..700cfc0 100644 --- a/drivers/net/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ixgbe/ixgbe_fcoe.c @@ -614,9 +614,9 @@ int ixgbe_fcoe_enable(struct net_device *netdev) netdev->vlan_features |= NETIF_F_FSO; netdev->vlan_features |= NETIF_F_FCOE_MTU; netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1; - netdev_features_change(netdev); ixgbe_init_interrupt_scheme(adapter); + netdev_features_change(netdev); if (netif_running(netdev)) netdev->netdev_ops->ndo_open(netdev); @@ -660,11 +660,11 @@ int ixgbe_fcoe_disable(struct net_device *netdev) netdev->vlan_features &= ~NETIF_F_FSO; netdev->vlan_features &= ~NETIF_F_FCOE_MTU; netdev->fcoe_ddp_xid = 0; - netdev_features_change(netdev); ixgbe_cleanup_fcoe(adapter); - ixgbe_init_interrupt_scheme(adapter); + netdev_features_change(netdev); + if (netif_running(netdev)) netdev->netdev_ops->ndo_open(netdev); rc = 0; -- cgit v0.10.2 From fd3686a842717b890fbe3024b83a616c54d5dba0 Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Fri, 19 Mar 2010 04:41:33 +0000 Subject: ixgbe: Set IXGBE_RSC_CB(skb)->DMA field to zero after unmapping the address As per Simon Horman's feedback set IXGBE_RSC_CB(skb)->dma to zero after unmapping HWRSC DMA address to avoid double freeing. Signed-off-by: Mallikarjuna R Chilakala Acked-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 18b5b21..d75c46f 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -935,10 +935,12 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (skb->prev) skb = ixgbe_transform_rsc_queue(skb, &(rx_ring->rsc_count)); if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { - if (IXGBE_RSC_CB(skb)->dma) + if (IXGBE_RSC_CB(skb)->dma) { pci_unmap_single(pdev, IXGBE_RSC_CB(skb)->dma, rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); + IXGBE_RSC_CB(skb)->dma = 0; + } if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) rx_ring->rsc_count += skb_shinfo(skb)->nr_frags; else @@ -3126,10 +3128,12 @@ static void ixgbe_clean_rx_ring(struct ixgbe_adapter *adapter, rx_buffer_info->skb = NULL; do { struct sk_buff *this = skb; - if (IXGBE_RSC_CB(this)->dma) + if (IXGBE_RSC_CB(this)->dma) { pci_unmap_single(pdev, IXGBE_RSC_CB(this)->dma, rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); + IXGBE_RSC_CB(this)->dma = 0; + } skb = skb->prev; dev_kfree_skb(this); } while (skb); -- cgit v0.10.2 From 33bd9f601ea21c4389870e425ae4eaf210d49b95 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 19 Mar 2010 02:59:52 +0000 Subject: ixgbevf: Fix VF Stats accounting after reset The counters in the 82599 Virtual Function are not clear on read. They accumulate to the maximum value and then roll over. They are also not cleared when the VF executes a soft reset, so it is possible they are non-zero when the driver loads and starts. This has all been accounted for in the code that keeps the stats up to date but there is one case that is not. When the PF driver is reset the counters in the VF are all reset to zero. This adds an additional accounting overhead into the VF driver when the PF is reset under its feet. This patch adds additional counters that are used by the VF driver to accumulate and save stats after a PF reset has been detected. Prior to this patch displaying the stats in the VF after the PF has reset would show bogus data. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ixgbevf/ethtool.c b/drivers/net/ixgbevf/ethtool.c index 399be0c..6fdd651 100644 --- a/drivers/net/ixgbevf/ethtool.c +++ b/drivers/net/ixgbevf/ethtool.c @@ -46,22 +46,32 @@ struct ixgbe_stats { int sizeof_stat; int stat_offset; int base_stat_offset; + int saved_reset_offset; }; -#define IXGBEVF_STAT(m, b) sizeof(((struct ixgbevf_adapter *)0)->m), \ - offsetof(struct ixgbevf_adapter, m), \ - offsetof(struct ixgbevf_adapter, b) +#define IXGBEVF_STAT(m, b, r) sizeof(((struct ixgbevf_adapter *)0)->m), \ + offsetof(struct ixgbevf_adapter, m), \ + offsetof(struct ixgbevf_adapter, b), \ + offsetof(struct ixgbevf_adapter, r) static struct ixgbe_stats ixgbe_gstrings_stats[] = { - {"rx_packets", IXGBEVF_STAT(stats.vfgprc, stats.base_vfgprc)}, - {"tx_packets", IXGBEVF_STAT(stats.vfgptc, stats.base_vfgptc)}, - {"rx_bytes", IXGBEVF_STAT(stats.vfgorc, stats.base_vfgorc)}, - {"tx_bytes", IXGBEVF_STAT(stats.vfgotc, stats.base_vfgotc)}, - {"tx_busy", IXGBEVF_STAT(tx_busy, zero_base)}, - {"multicast", IXGBEVF_STAT(stats.vfmprc, stats.base_vfmprc)}, - {"rx_csum_offload_good", IXGBEVF_STAT(hw_csum_rx_good, zero_base)}, - {"rx_csum_offload_errors", IXGBEVF_STAT(hw_csum_rx_error, zero_base)}, - {"tx_csum_offload_ctxt", IXGBEVF_STAT(hw_csum_tx_good, zero_base)}, - {"rx_header_split", IXGBEVF_STAT(rx_hdr_split, zero_base)}, + {"rx_packets", IXGBEVF_STAT(stats.vfgprc, stats.base_vfgprc, + stats.saved_reset_vfgprc)}, + {"tx_packets", IXGBEVF_STAT(stats.vfgptc, stats.base_vfgptc, + stats.saved_reset_vfgptc)}, + {"rx_bytes", IXGBEVF_STAT(stats.vfgorc, stats.base_vfgorc, + stats.saved_reset_vfgorc)}, + {"tx_bytes", IXGBEVF_STAT(stats.vfgotc, stats.base_vfgotc, + stats.saved_reset_vfgotc)}, + {"tx_busy", IXGBEVF_STAT(tx_busy, zero_base, zero_base)}, + {"multicast", IXGBEVF_STAT(stats.vfmprc, stats.base_vfmprc, + stats.saved_reset_vfmprc)}, + {"rx_csum_offload_good", IXGBEVF_STAT(hw_csum_rx_good, zero_base, + zero_base)}, + {"rx_csum_offload_errors", IXGBEVF_STAT(hw_csum_rx_error, zero_base, + zero_base)}, + {"tx_csum_offload_ctxt", IXGBEVF_STAT(hw_csum_tx_good, zero_base, + zero_base)}, + {"rx_header_split", IXGBEVF_STAT(rx_hdr_split, zero_base, zero_base)}, }; #define IXGBE_QUEUE_STATS_LEN 0 @@ -455,10 +465,14 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, ixgbe_gstrings_stats[i].stat_offset; char *b = (char *)adapter + ixgbe_gstrings_stats[i].base_stat_offset; + char *r = (char *)adapter + + ixgbe_gstrings_stats[i].saved_reset_offset; data[i] = ((ixgbe_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p) - ((ixgbe_gstrings_stats[i].sizeof_stat == - sizeof(u64)) ? *(u64 *)b : *(u32 *)b); + sizeof(u64)) ? *(u64 *)b : *(u32 *)b) + + ((ixgbe_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)r : *(u32 *)r); } } diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index ca653c4..43927e1 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -1610,6 +1610,44 @@ static inline void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, (adapter->rx_ring[rxr].count - 1)); } +static void ixgbevf_save_reset_stats(struct ixgbevf_adapter *adapter) +{ + /* Only save pre-reset stats if there are some */ + if (adapter->stats.vfgprc || adapter->stats.vfgptc) { + adapter->stats.saved_reset_vfgprc += adapter->stats.vfgprc - + adapter->stats.base_vfgprc; + adapter->stats.saved_reset_vfgptc += adapter->stats.vfgptc - + adapter->stats.base_vfgptc; + adapter->stats.saved_reset_vfgorc += adapter->stats.vfgorc - + adapter->stats.base_vfgorc; + adapter->stats.saved_reset_vfgotc += adapter->stats.vfgotc - + adapter->stats.base_vfgotc; + adapter->stats.saved_reset_vfmprc += adapter->stats.vfmprc - + adapter->stats.base_vfmprc; + } +} + +static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + adapter->stats.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); + adapter->stats.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); + adapter->stats.last_vfgorc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); + adapter->stats.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); + adapter->stats.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); + adapter->stats.last_vfgotc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); + adapter->stats.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); + + adapter->stats.base_vfgprc = adapter->stats.last_vfgprc; + adapter->stats.base_vfgorc = adapter->stats.last_vfgorc; + adapter->stats.base_vfgptc = adapter->stats.last_vfgptc; + adapter->stats.base_vfgotc = adapter->stats.last_vfgotc; + adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; +} + static int ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1656,6 +1694,9 @@ static int ixgbevf_up_complete(struct ixgbevf_adapter *adapter) /* enable transmits */ netif_tx_start_all_queues(netdev); + ixgbevf_save_reset_stats(adapter); + ixgbevf_init_last_counter_stats(adapter); + /* bring the link up in the watchdog, this could race with our first * link up interrupt but shouldn't be a problem */ adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; @@ -2228,27 +2269,6 @@ out: return err; } -static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - adapter->stats.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); - adapter->stats.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); - adapter->stats.last_vfgorc |= - (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); - adapter->stats.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); - adapter->stats.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); - adapter->stats.last_vfgotc |= - (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); - adapter->stats.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); - - adapter->stats.base_vfgprc = adapter->stats.last_vfgprc; - adapter->stats.base_vfgorc = adapter->stats.last_vfgorc; - adapter->stats.base_vfgptc = adapter->stats.last_vfgptc; - adapter->stats.base_vfgotc = adapter->stats.last_vfgotc; - adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; -} - #define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \ { \ u32 current_counter = IXGBE_READ_REG(hw, reg); \ @@ -2416,9 +2436,9 @@ static void ixgbevf_watchdog_task(struct work_struct *work) } } -pf_has_reset: ixgbevf_update_stats(adapter); +pf_has_reset: /* Force detection of hung controller every watchdog period */ adapter->detect_tx_hung = true; @@ -3390,8 +3410,6 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, /* setup the private structure */ err = ixgbevf_sw_init(adapter); - ixgbevf_init_last_counter_stats(adapter); - #ifdef MAX_SKB_FRAGS netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | @@ -3449,6 +3467,8 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, adapter->netdev_registered = true; + ixgbevf_init_last_counter_stats(adapter); + /* print the MAC address */ hw_dbg(hw, "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", netdev->dev_addr[0], diff --git a/drivers/net/ixgbevf/vf.h b/drivers/net/ixgbevf/vf.h index 799600e..1f31b05 100644 --- a/drivers/net/ixgbevf/vf.h +++ b/drivers/net/ixgbevf/vf.h @@ -157,6 +157,12 @@ struct ixgbevf_hw_stats { u64 vfgorc; u64 vfgotc; u64 vfmprc; + + u64 saved_reset_vfgprc; + u64 saved_reset_vfgptc; + u64 saved_reset_vfgorc; + u64 saved_reset_vfgotc; + u64 saved_reset_vfmprc; }; struct ixgbevf_info { -- cgit v0.10.2 From 4c3a822395c01d50ca2ba3aa4529e19d237a2f8c Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 19 Mar 2010 03:00:12 +0000 Subject: ixgbevf: Shorten up delay timer for watchdog task The recovery from PF reset works better when you shorten up the delay until the watchdog task executes. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index 43927e1..3de93ae 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -965,7 +965,7 @@ static irqreturn_t ixgbevf_msix_mbx(int irq, void *data) if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 10)); + round_jiffies(jiffies + 1)); return IRQ_HANDLED; } -- cgit v0.10.2 From 29b8dd024bd48c3d1d1e5140f5bbb683786f998e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 19 Mar 2010 03:00:31 +0000 Subject: ixgbevf: Message formatting cleanups Clean up some text output formatting. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index 3de93ae..d6cbd94 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -2419,7 +2419,7 @@ static void ixgbevf_watchdog_task(struct work_struct *work) if (!netif_carrier_ok(netdev)) { hw_dbg(&adapter->hw, "NIC Link is Up %s, ", ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) ? - "10 Gbps" : "1 Gbps")); + "10 Gbps\n" : "1 Gbps\n")); netif_carrier_on(netdev); netif_tx_wake_all_queues(netdev); } else { @@ -2695,7 +2695,7 @@ static int ixgbevf_open(struct net_device *netdev) if (hw->adapter_stopped) { err = IXGBE_ERR_MBX; printk(KERN_ERR "Unable to start - perhaps the PF" - "Driver isn't up yet\n"); + " Driver isn't up yet\n"); goto err_setup_reset; } } -- cgit v0.10.2 From b894fa2627e28c078740dc7041cd08c7e2c353ab Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Fri, 19 Mar 2010 06:07:48 +0000 Subject: igb: Add support for 82576 ET2 Quad Port Server Adapter Signed-off-by: Carolyn Wyborny Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c index 9d7fa2f..0bc990e 100644 --- a/drivers/net/igb/e1000_82575.c +++ b/drivers/net/igb/e1000_82575.c @@ -94,6 +94,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: case E1000_DEV_ID_82576_SERDES_QUAD: mac->type = e1000_82576; break; diff --git a/drivers/net/igb/e1000_hw.h b/drivers/net/igb/e1000_hw.h index 4480052..82a533f 100644 --- a/drivers/net/igb/e1000_hw.h +++ b/drivers/net/igb/e1000_hw.h @@ -41,6 +41,7 @@ struct e1000_hw; #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 #define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 #define E1000_DEV_ID_82576_NS 0x150A #define E1000_DEV_ID_82576_NS_SERDES 0x1518 #define E1000_DEV_ID_82576_SERDES_QUAD 0x150D diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 0ed25f0..45a0e4f 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -72,6 +72,7 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, -- cgit v0.10.2 From ea93fd9456ad32cd85b2d7914b58c6313cc40c9e Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Fri, 19 Mar 2010 22:43:29 -0700 Subject: KS8695: update ksp->next_rx_desc_read at the end of rx loop There is no need to adjust the next rx descriptor after each packet, so do it only once at the end of the routine. Signed-off-by: Eric Dumazet Signed-off-by: Yegor Yefremov diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c index a1d4188..e7810b7 100644 --- a/drivers/net/arm/ks8695net.c +++ b/drivers/net/arm/ks8695net.c @@ -449,11 +449,10 @@ ks8695_rx_irq(int irq, void *dev_id) } /** - * ks8695_rx - Receive packets called by NAPI poll method + * ks8695_rx - Receive packets called by NAPI poll method * @ksp: Private data for the KS8695 Ethernet - * @budget: The max packets would be receive + * @budget: Number of packets allowed to process */ - static int ks8695_rx(struct ks8695_priv *ksp, int budget) { struct net_device *ndev = ksp->ndev; @@ -461,7 +460,6 @@ static int ks8695_rx(struct ks8695_priv *ksp, int budget) int buff_n; u32 flags; int pktlen; - int last_rx_processed = -1; int received = 0; buff_n = ksp->next_rx_desc_read; @@ -471,6 +469,7 @@ static int ks8695_rx(struct ks8695_priv *ksp, int budget) cpu_to_le32(RDES_OWN)))) { rmb(); flags = le32_to_cpu(ksp->rx_ring[buff_n].status); + /* Found an SKB which we own, this means we * received a packet */ @@ -533,23 +532,18 @@ rx_failure: ksp->rx_ring[buff_n].status = cpu_to_le32(RDES_OWN); rx_finished: received++; - /* And note this as processed so we can start - * from here next time - */ - last_rx_processed = buff_n; buff_n = (buff_n + 1) & MAX_RX_DESC_MASK; - /*And note which RX descriptor we last did */ - if (likely(last_rx_processed != -1)) - ksp->next_rx_desc_read = - (last_rx_processed + 1) & - MAX_RX_DESC_MASK; } + + /* And note which RX descriptor we last did */ + ksp->next_rx_desc_read = buff_n; + /* And refill the buffers */ ks8695_refill_rxbuffers(ksp); - /* Kick the RX DMA engine, in case it became - * suspended */ + /* Kick the RX DMA engine, in case it became suspended */ ks8695_writereg(ksp, KS8695_DRSC, 0); + return received; } -- cgit v0.10.2 From a50436f2cd6e85794f7e1aad795ca8302177b896 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Mar 2010 06:04:14 +0000 Subject: net: ipmr/ip6mr: fix potential out-of-bounds vif_table access mfc_parent of cache entries is used to index into the vif_table and is initialised from mfcctl->mfcc_parent. This can take values of to 2^16-1, while the vif_table has only MAXVIFS (32) entries. The same problem affects ip6mr. Refuse invalid values to fix a potential out-of-bounds access. Unlike the other validity checks, this is checked in ipmr_mfc_add() instead of the setsockopt handler since its unused in the delete path and might be uninitialized. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8582e12..0b9d03c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -802,6 +802,9 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) int line; struct mfc_cache *uc, *c, **cp; + if (mfc->mfcc_parent >= MAXVIFS) + return -ENFILE; + line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); for (cp = &net->ipv4.mfc_cache_array[line]; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 52e0f74..23e4ac0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1113,6 +1113,9 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) unsigned char ttls[MAXMIFS]; int i; + if (mfc->mf6cc_parent >= MAXMIFS) + return -ENFILE; + memset(ttls, 255, MAXMIFS); for (i = 0; i < MAXMIFS; i++) { if (IF_ISSET(i, &mfc->mf6cc_ifset)) -- cgit v0.10.2 From 6830c25b7d08fbbd922959425193791bc42079f2 Mon Sep 17 00:00:00 2001 From: Lennart Schulte Date: Wed, 17 Mar 2010 02:16:29 +0000 Subject: tcp: Fix tcp_mark_head_lost() with packets == 0 A packet is marked as lost in case packets == 0, although nothing should be done. This results in a too early retransmitted packet during recovery in some cases. This small patch fixes this issue by returning immediately. Signed-off-by: Lennart Schulte Signed-off-by: Arnd Hannemann Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 788851c..c096a42 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2511,6 +2511,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) int err; unsigned int mss; + if (packets == 0) + return; + WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { skb = tp->lost_skb_hint; -- cgit v0.10.2 From f5d410f2ea7ba340f11815a56e05b9fa9421c421 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Mar 2010 13:30:44 +0000 Subject: netlink: fix unaligned access in nla_get_be64() This patch fixes a unaligned access in nla_get_be64() that was introduced by myself in a17c859849402315613a0015ac8fbf101acf0cc1. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller diff --git a/include/net/netlink.h b/include/net/netlink.h index f82e463..4fc05b5 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -945,7 +945,11 @@ static inline u64 nla_get_u64(const struct nlattr *nla) */ static inline __be64 nla_get_be64(const struct nlattr *nla) { - return *(__be64 *) nla_data(nla); + __be64 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + + return tmp; } /** -- cgit v0.10.2 From 73852e8151b7d7a529fbe019ab6d2d0c02d8f3f2 Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Tue, 16 Mar 2010 05:22:44 +0000 Subject: NET_DMA: free skbs periodically Under NET_DMA, data transfer can grind to a halt when userland issues a large read on a socket with a high RCVLOWAT (i.e., 512 KB for both). This appears to be because the NET_DMA design queues up lots of memcpy operations, but doesn't issue or wait for them (and thus free the associated skbs) until it is time for tcp_recvmesg() to return. The socket hangs when its TCP window goes to zero before enough data is available to satisfy the read. Periodically issue asynchronous memcpy operations, and free skbs for ones that have completed, to prevent sockets from going into zero-window mode. Signed-off-by: Steven J. Magnani Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ae16f80..6afb6d8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1254,6 +1254,39 @@ static void tcp_prequeue_process(struct sock *sk) tp->ucopy.memory = 0; } +#ifdef CONFIG_NET_DMA +static void tcp_service_net_dma(struct sock *sk, bool wait) +{ + dma_cookie_t done, used; + dma_cookie_t last_issued; + struct tcp_sock *tp = tcp_sk(sk); + + if (!tp->ucopy.dma_chan) + return; + + last_issued = tp->ucopy.dma_cookie; + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + + do { + if (dma_async_memcpy_complete(tp->ucopy.dma_chan, + last_issued, &done, + &used) == DMA_SUCCESS) { + /* Safe to free early-copied skbs now */ + __skb_queue_purge(&sk->sk_async_wait_queue); + break; + } else { + struct sk_buff *skb; + while ((skb = skb_peek(&sk->sk_async_wait_queue)) && + (dma_async_is_complete(skb->dma_cookie, done, + used) == DMA_SUCCESS)) { + __skb_dequeue(&sk->sk_async_wait_queue); + kfree_skb(skb); + } + } + } while (wait); +} +#endif + static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1546,6 +1579,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* __ Set realtime policy in scheduler __ */ } +#ifdef CONFIG_NET_DMA + if (tp->ucopy.dma_chan) + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); +#endif if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); @@ -1554,6 +1591,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk_wait_data(sk, &timeo); #ifdef CONFIG_NET_DMA + tcp_service_net_dma(sk, false); /* Don't block */ tp->ucopy.wakeup = 0; #endif @@ -1633,6 +1671,9 @@ do_prequeue: copied = -EFAULT; break; } + + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if ((offset + used) == skb->len) copied_early = 1; @@ -1702,27 +1743,9 @@ skip_copy: } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) { - dma_cookie_t done, used; - - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); - - while (dma_async_memcpy_complete(tp->ucopy.dma_chan, - tp->ucopy.dma_cookie, &done, - &used) == DMA_IN_PROGRESS) { - /* do partial cleanup of sk_async_wait_queue */ - while ((skb = skb_peek(&sk->sk_async_wait_queue)) && - (dma_async_is_complete(skb->dma_cookie, done, - used) == DMA_SUCCESS)) { - __skb_dequeue(&sk->sk_async_wait_queue); - kfree_skb(skb); - } - } + tcp_service_net_dma(sk, true); /* Wait for queue to drain */ + tp->ucopy.dma_chan = NULL; - /* Safe to free early-copied skbs now */ - __skb_queue_purge(&sk->sk_async_wait_queue); - tp->ucopy.dma_chan = NULL; - } if (tp->ucopy.pinned_list) { dma_unpin_iovec_pages(tp->ucopy.pinned_list); tp->ucopy.pinned_list = NULL; -- cgit v0.10.2 From 1a50307ba1826e4da0024e64b245ce4eadf7688a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Mar 2010 14:24:42 +0000 Subject: netlink: fix NETLINK_RECV_NO_ENOBUFS in netlink_set_err() Currently, ENOBUFS errors are reported to the socket via netlink_set_err() even if NETLINK_RECV_NO_ENOBUFS is set. However, that should not happen. This fixes this problem and it changes the prototype of netlink_set_err() to return the number of sockets that have set the NETLINK_RECV_NO_ENOBUFS socket option. This return value is used in the next patch in these bugfix series. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller diff --git a/include/linux/netlink.h b/include/linux/netlink.h index fde27c0..6eaca5e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -188,7 +188,7 @@ extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, __u32 group, gfp_t allocation); -extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); +extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 320d042..acbbae1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1093,6 +1093,7 @@ static inline int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) { struct netlink_sock *nlk = nlk_sk(sk); + int ret = 0; if (sk == p->exclude_sk) goto out; @@ -1104,10 +1105,15 @@ static inline int do_one_set_err(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; + if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { + ret = 1; + goto out; + } + sk->sk_err = p->code; sk->sk_error_report(sk); out: - return 0; + return ret; } /** @@ -1116,12 +1122,16 @@ out: * @pid: the PID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) + * + * This function returns the number of broadcast listeners that have set the + * NETLINK_RECV_NO_ENOBUFS socket option. */ -void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; struct sock *sk; + int ret = 0; info.exclude_sk = ssk; info.pid = pid; @@ -1132,9 +1142,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_lock(&nl_table_lock); sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) - do_one_set_err(sk, &info); + ret += do_one_set_err(sk, &info); read_unlock(&nl_table_lock); + return ret; } EXPORT_SYMBOL(netlink_set_err); -- cgit v0.10.2 From 37b7ef7203240b3aba577bb1ff6765fe15225976 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Mar 2010 13:30:21 +0000 Subject: netfilter: ctnetlink: fix reliable event delivery if message building fails This patch fixes a bug that allows to lose events when reliable event delivery mode is used, ie. if NETLINK_BROADCAST_SEND_ERROR and NETLINK_RECV_NO_ENOBUFS socket options are set. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 5392386..361d6b5 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -76,7 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(struct net *net, unsigned int group); extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags); -extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); +extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); extern void nfnl_lock(void); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2b2af63..569410a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -582,7 +582,9 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(net, 0, group, -ENOBUFS); + if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0) + return -ENOBUFS; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8eb0cc2..6afa3d5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(net->nfnl, pid, group, error); + return netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -- cgit v0.10.2