diff options
author | Michael Chan <mchan@broadcom.com> | 2006-08-08 04:46:02 (GMT) |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2006-08-08 04:46:02 (GMT) |
commit | 1b2a720506ccf7c30baaeda5d990c29b31e21726 (patch) | |
tree | 5990f56e364077c83563566ed64beae95b855a9f | |
parent | bd37a088596ccdb2b2dd3299e25e333bca7a9a34 (diff) | |
download | linux-1b2a720506ccf7c30baaeda5d990c29b31e21726.tar.xz |
[TG3]: Fix tx race condition
Fix a subtle race condition between tg3_start_xmit() and tg3_tx()
discovered by Herbert Xu <herbert@gondor.apana.org.au>:
CPU0 CPU1
tg3_start_xmit()
if (tx_ring_full) {
tx_lock
tg3_tx()
if (!netif_queue_stopped)
netif_stop_queue()
if (!tx_ring_full)
update_tx_ring
netif_wake_queue()
tx_unlock
}
Even though tx_ring is updated before the if statement in tg3_tx() in
program order, it can be re-ordered by the CPU as shown above. This
scenario can cause the tx queue to be stopped forever if tg3_tx() has
just freed up the entire tx_ring. The possibility of this happening
should be very rare though.
The following changes are made:
1. Add memory barrier to fix the above race condition.
2. Eliminate the private tx_lock altogether and rely solely on
netif_tx_lock. This eliminates one spinlock in tg3_start_xmit()
when the ring is full.
3. Because of 2, use netif_tx_lock in tg3_tx() before calling
netif_wake_queue().
4. Change TX_BUFFS_AVAIL to an inline function with a memory barrier.
Herbert and David suggested using the memory barrier instead of
volatile.
5. Check for the full wake queue condition before getting
netif_tx_lock in tg3_tx(). This reduces the number of unnecessary
spinlocks when the tx ring is full in a steady-state condition.
6. Update version to 3.65.
Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/tg3.c | 49 | ||||
-rw-r--r-- | drivers/net/tg3.h | 8 |
2 files changed, 31 insertions, 26 deletions
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 0afbed6..eafabb2 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -68,8 +68,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.64" -#define DRV_MODULE_RELDATE "July 31, 2006" +#define DRV_MODULE_VERSION "3.65" +#define DRV_MODULE_RELDATE "August 07, 2006" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -123,9 +123,6 @@ TG3_RX_RCB_RING_SIZE(tp)) #define TG3_TX_RING_BYTES (sizeof(struct tg3_tx_buffer_desc) * \ TG3_TX_RING_SIZE) -#define TX_BUFFS_AVAIL(TP) \ - ((TP)->tx_pending - \ - (((TP)->tx_prod - (TP)->tx_cons) & (TG3_TX_RING_SIZE - 1))) #define NEXT_TX(N) (((N) + 1) & (TG3_TX_RING_SIZE - 1)) #define RX_PKT_BUF_SZ (1536 + tp->rx_offset + 64) @@ -2987,6 +2984,13 @@ static void tg3_tx_recover(struct tg3 *tp) spin_unlock(&tp->lock); } +static inline u32 tg3_tx_avail(struct tg3 *tp) +{ + smp_mb(); + return (tp->tx_pending - + ((tp->tx_prod - tp->tx_cons) & (TG3_TX_RING_SIZE - 1))); +} + /* Tigon3 never reports partial packet sends. So we do not * need special logic to handle SKBs that have not had all * of their frags sent yet, like SunGEM does. @@ -3038,12 +3042,20 @@ static void tg3_tx(struct tg3 *tp) tp->tx_cons = sw_idx; - if (unlikely(netif_queue_stopped(tp->dev))) { - spin_lock(&tp->tx_lock); + /* Need to make the tx_cons update visible to tg3_start_xmit() + * before checking for netif_queue_stopped(). Without the + * memory barrier, there is a small possibility that tg3_start_xmit() + * will miss it and cause the queue to be stopped forever. + */ + smp_mb(); + + if (unlikely(netif_queue_stopped(tp->dev) && + (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH))) { + netif_tx_lock(tp->dev); if (netif_queue_stopped(tp->dev) && - (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH)) + (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH)) netif_wake_queue(tp->dev); - spin_unlock(&tp->tx_lock); + netif_tx_unlock(tp->dev); } } @@ -3795,7 +3807,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) * interrupt. Furthermore, IRQ processing runs lockless so we have * no IRQ context deadlocks to worry about either. Rejoice! */ - if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { + if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { if (!netif_queue_stopped(dev)) { netif_stop_queue(dev); @@ -3891,12 +3903,10 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); tp->tx_prod = entry; - if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { - spin_lock(&tp->tx_lock); + if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) { netif_stop_queue(dev); - if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) + if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH) netif_wake_queue(tp->dev); - spin_unlock(&tp->tx_lock); } out_unlock: @@ -3918,7 +3928,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) struct sk_buff *segs, *nskb; /* Estimate the number of fragments in the worst case */ - if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->gso_segs * 3))) { + if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->gso_segs * 3))) { netif_stop_queue(tp->dev); return NETDEV_TX_BUSY; } @@ -3958,7 +3968,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) * interrupt. Furthermore, IRQ processing runs lockless so we have * no IRQ context deadlocks to worry about either. Rejoice! */ - if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { + if (unlikely(tg3_tx_avail(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { if (!netif_queue_stopped(dev)) { netif_stop_queue(dev); @@ -4108,12 +4118,10 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) tw32_tx_mbox((MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW), entry); tp->tx_prod = entry; - if (unlikely(TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))) { - spin_lock(&tp->tx_lock); + if (unlikely(tg3_tx_avail(tp) <= (MAX_SKB_FRAGS + 1))) { netif_stop_queue(dev); - if (TX_BUFFS_AVAIL(tp) > TG3_TX_WAKEUP_THRESH) + if (tg3_tx_avail(tp) > TG3_TX_WAKEUP_THRESH) netif_wake_queue(tp->dev); - spin_unlock(&tp->tx_lock); } out_unlock: @@ -11472,7 +11480,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA; #endif spin_lock_init(&tp->lock); - spin_lock_init(&tp->tx_lock); spin_lock_init(&tp->indirect_lock); INIT_WORK(&tp->reset_task, tg3_reset_task, tp); diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index ba2c987..3ecf356 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2079,9 +2079,9 @@ struct tg3 { * lock: Held during reset, PHY access, timer, and when * updating tg3_flags and tg3_flags2. * - * tx_lock: Held during tg3_start_xmit and tg3_tx only - * when calling netif_[start|stop]_queue. - * tg3_start_xmit is protected by netif_tx_lock. + * netif_tx_lock: Held during tg3_start_xmit. tg3_tx holds + * netif_tx_lock when it needs to call + * netif_wake_queue. * * Both of these locks are to be held with BH safety. * @@ -2118,8 +2118,6 @@ struct tg3 { u32 tx_cons; u32 tx_pending; - spinlock_t tx_lock; - struct tg3_tx_buffer_desc *tx_ring; struct tx_ring_info *tx_buffers; dma_addr_t tx_desc_mapping; |