From 4e5b6d006b61b6b998e15da850d959520f811b4c Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Fri, 8 May 2009 14:02:32 -0700 Subject: wimax/i2400m: fix device crash: fix optimization in _roq_queue_update_ws When the i2400m receives data and the device indicates there has to be reordering, we keep an sliding window implementation to sort the packets before sending them to the network stack. One of the "operations" that the device indicates is "queue a packet and update the window start". When the queue is empty, this is equivalent to "deliver the packet and update the window start". That case was optimized in i2400m_roq_queue_update_ws() so that we would not pointlessly queue and dequeue a packet. However, when the optimization was active, it wasn't updating the window start. That caused the reorder management code to get confused later on with what seemed to be wrong reorder requests from the device. Thus the fix implemented is to do the right thing and update the window start in both cases, when the queue is empty (and the optimization is done) and when not. Signed-off-by: Inaky Perez-Gonzalez diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 02419bf..f9fc389 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -819,10 +819,9 @@ void i2400m_roq_queue_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq, roq_data = (struct i2400m_roq_data *) &skb->cb; i2400m_net_erx(i2400m, skb, roq_data->cs); } - else { + else __i2400m_roq_queue(i2400m, roq, skb, sn, nsn); - __i2400m_roq_update_ws(i2400m, roq, sn + 1); - } + __i2400m_roq_update_ws(i2400m, roq, sn + 1); i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET_WS, old_ws, len, sn, nsn, roq->ws); } -- cgit v0.10.2 From 5e392739d6ab72f7c35040aa07f4097904bce6e7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 11 May 2009 00:36:35 +0000 Subject: netpoll: don't dereference NULL dev from np It looks like the dev in netpoll_poll can be NULL - at lease it's checked at the function beginning. Thus the dev->netde_ops dereference looks dangerous. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b5873bd..64f51ee 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -175,9 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; - const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_ops *ops; + + if (!dev || !netif_running(dev)) + return; - if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) + ops = dev->netdev_ops; + if (!ops->ndo_poll_controller) return; /* Process pending work on NIC */ -- cgit v0.10.2 From 2513dfb83fc775364fe85803d3a84d7ebe5763a5 Mon Sep 17 00:00:00 2001 From: Chris Friesen Date: Sun, 17 May 2009 20:39:33 -0700 Subject: ipconfig: handle case of delayed DHCP server If a DHCP server is delayed, it's possible for the client to receive the DHCPOFFER after it has already sent out a new DHCPDISCOVER message from a second interface. The client then sends out a DHCPREQUEST from the second interface, but the server doesn't recognize the device and rejects the request. This patch simply tracks the current device being configured and throws away the OFFER if it is not intended for the current device. A more sophisticated approach would be to put the OFFER information into the struct ic_device rather than storing it globally. Signed-off-by: Chris Friesen Signed-off-by: David S. Miller diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 90d22ae..88bf051 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ +u32 ic_dev_xid; /* Device under configuration */ + /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop_unlock; } + /* Is it a reply for the device we are configuring? */ + if (b->xid != ic_dev_xid) { + if (net_ratelimit()) + printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); + goto drop_unlock; + } + /* Parse extensions */ if (ext_len >= 4 && !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ @@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void) get_random_bytes(&timeout, sizeof(timeout)); timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); for (;;) { + /* Track the device we are configuring */ + ic_dev_xid = d->xid; + #ifdef IPCONFIG_BOOTP if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); -- cgit v0.10.2 From 72876a603422d3767273ffb3918033fa1bfda0f3 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Sun, 17 May 2009 20:48:59 -0700 Subject: mlx4_en: Fix not deleted napi structures Napi structures are being created each time we open a port, but when the port is closed the napi structure is only disabled but not removed. This bug caused hang while removing the driver. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c index 91f50de..a276125 100644 --- a/drivers/net/mlx4/en_cq.c +++ b/drivers/net/mlx4/en_cq.c @@ -125,8 +125,10 @@ void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) if (cq->is_tx) del_timer(&cq->timer); - else + else { napi_disable(&cq->napi); + netif_napi_del(&cq->napi); + } mlx4_cq_free(mdev->dev, &cq->mcq); } -- cgit v0.10.2 From a8f492c6ac6072d6c5bce4b2f13dc44612991051 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 16 May 2009 01:21:58 +0000 Subject: NET: Meth: Fix unsafe mix of irq and non-irq spinlocks. Mixing of normal and irq spinlocks results in the following lockdep messages on bootup on IP32: [...] Sending DHCP requests . ====================================================== [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] 2.6.30-rc5-00164-g41baeef #30 ------------------------------------------------------ swapper/1 [HC0[0]:SC0[1]:HE0:SE0] is trying to acquire: (&priv->meth_lock){+.+...}, at: [] meth_tx+0x48/0x43c and this task is already holding: (_xmit_ETHER#2){+.-...}, at: [] __qdisc_run+0x118/0x30c which would create a new lock dependency: (_xmit_ETHER#2){+.-...} -> (&priv->meth_lock){+.+...} but this new dependency connects a SOFTIRQ-irq-safe lock: (_xmit_ETHER#2){+.-...} ... which became SOFTIRQ-irq-safe at: [] __lock_acquire+0x784/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] dev_watchdog+0x70/0x398 [] run_timer_softirq+0x1a8/0x248 [] __do_softirq+0xec/0x208 [] do_softirq+0x60/0xe4 [] irq_exit+0x54/0x9c [] ret_from_irq+0x0/0x4 [] r4k_wait+0x20/0x40 [] cpu_idle+0x30/0x60 [] start_kernel+0x3ec/0x404 to a SOFTIRQ-irq-unsafe lock: (&priv->meth_lock){+.+...} ... which became SOFTIRQ-irq-unsafe at: ... [] __lock_acquire+0x824/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] meth_reset+0x118/0x2d8 [] meth_open+0x28/0x140 [] dev_open+0xe0/0x18c [] dev_change_flags+0xd8/0x1d4 [] ip_auto_config+0x1d4/0xf28 [] do_one_initcall+0x58/0x170 [] kernel_init+0x98/0x104 [] kernel_thread_helper+0x10/0x18 other info that might help us debug this: 2 locks held by swapper/1: #0: (rcu_read_lock){.+.+..}, at: [] dev_queue_xmit+0x1e0/0x4b0 #1: (_xmit_ETHER#2){+.-...}, at: [] __qdisc_run+0x118/0x30c the SOFTIRQ-irq-safe lock's dependencies: -> (_xmit_ETHER#2){+.-...} ops: 0 { HARDIRQ-ON-W at: [] __lock_acquire+0x7fc/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] dev_watchdog+0x70/0x398 [] run_timer_softirq+0x1a8/0x248 [] __do_softirq+0xec/0x208 [] do_softirq+0x60/0xe4 [] irq_exit+0x54/0x9c [] ret_from_irq+0x0/0x4 [] r4k_wait+0x20/0x40 [] cpu_idle+0x30/0x60 [] start_kernel+0x3ec/0x404 IN-SOFTIRQ-W at: [] __lock_acquire+0x784/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] dev_watchdog+0x70/0x398 [] run_timer_softirq+0x1a8/0x248 [] __do_softirq+0xec/0x208 [] do_softirq+0x60/0xe4 [] irq_exit+0x54/0x9c [] ret_from_irq+0x0/0x4 [] r4k_wait+0x20/0x40 [] cpu_idle+0x30/0x60 [] start_kernel+0x3ec/0x404 INITIAL USE at: [] __lock_acquire+0x89c/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] dev_watchdog+0x70/0x398 [] run_timer_softirq+0x1a8/0x248 [] __do_softirq+0xec/0x208 [] do_softirq+0x60/0xe4 [] irq_exit+0x54/0x9c [] ret_from_irq+0x0/0x4 [] r4k_wait+0x20/0x40 [] cpu_idle+0x30/0x60 [] start_kernel+0x3ec/0x404 } ... key at: [] netdev_xmit_lock_key+0x8/0x1c8 the SOFTIRQ-irq-unsafe lock's dependencies: -> (&priv->meth_lock){+.+...} ops: 0 { HARDIRQ-ON-W at: [] __lock_acquire+0x7fc/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] meth_reset+0x118/0x2d8 [] meth_open+0x28/0x140 [] dev_open+0xe0/0x18c [] dev_change_flags+0xd8/0x1d4 [] ip_auto_config+0x1d4/0xf28 [] do_one_initcall+0x58/0x170 [] kernel_init+0x98/0x104 [] kernel_thread_helper+0x10/0x18 SOFTIRQ-ON-W at: [] __lock_acquire+0x824/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] meth_reset+0x118/0x2d8 [] meth_open+0x28/0x140 [] dev_open+0xe0/0x18c [] dev_change_flags+0xd8/0x1d4 [] ip_auto_config+0x1d4/0xf28 [] do_one_initcall+0x58/0x170 [] kernel_init+0x98/0x104 [] kernel_thread_helper+0x10/0x18 INITIAL USE at: [] __lock_acquire+0x89c/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock+0x30/0x44 [] meth_reset+0x118/0x2d8 [] meth_open+0x28/0x140 [] dev_open+0xe0/0x18c [] dev_change_flags+0xd8/0x1d4 [] ip_auto_config+0x1d4/0xf28 [] do_one_initcall+0x58/0x170 [] kernel_init+0x98/0x104 [] kernel_thread_helper+0x10/0x18 } ... key at: [] __key.32424+0x0/0x8 stack backtrace: Call Trace: [] dump_stack+0x8/0x34 [] check_usage+0x470/0x4a0 [] check_irq_usage+0x90/0x130 [] __lock_acquire+0x12a4/0x1a14 [] lock_acquire+0xf8/0x150 [] _spin_lock_irqsave+0x60/0x84 [] meth_tx+0x48/0x43c [] __qdisc_run+0x150/0x30c [] dev_queue_xmit+0x334/0x4b0 [] ip_auto_config+0x8d0/0xf28 [] do_one_initcall+0x58/0x170 [] kernel_init+0x98/0x104 [] kernel_thread_helper+0x10/0x18 ..... timed out! IP-Config: Retrying forever (NFS root)... Sending DHCP requests ., OK [...] Fixed by converting all locks to irq locks. Signed-off-by: Ralf Baechle Tested-by: Andrew Randrianasulu Signed-off-by: David S. Miller diff --git a/drivers/net/meth.c b/drivers/net/meth.c index aa08987..dbd3436 100644 --- a/drivers/net/meth.c +++ b/drivers/net/meth.c @@ -127,11 +127,11 @@ static unsigned long mdio_read(struct meth_private *priv, unsigned long phyreg) static int mdio_probe(struct meth_private *priv) { int i; - unsigned long p2, p3; + unsigned long p2, p3, flags; /* check if phy is detected already */ if(priv->phy_addr>=0&&priv->phy_addr<32) return 0; - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); for (i=0;i<32;++i){ priv->phy_addr=i; p2=mdio_read(priv,2); @@ -157,7 +157,7 @@ static int mdio_probe(struct meth_private *priv) break; } } - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); if(priv->phy_addr<32) { return 0; } @@ -373,14 +373,14 @@ static int meth_release(struct net_device *dev) static void meth_rx(struct net_device* dev, unsigned long int_status) { struct sk_buff *skb; - unsigned long status; + unsigned long status, flags; struct meth_private *priv = netdev_priv(dev); unsigned long fifo_rptr = (int_status & METH_INT_RX_RPTR_MASK) >> 8; - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); priv->dma_ctrl &= ~METH_DMA_RX_INT_EN; mace->eth.dma_ctrl = priv->dma_ctrl; - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); if (int_status & METH_INT_RX_UNDERFLOW) { fifo_rptr = (fifo_rptr - 1) & 0x0f; @@ -452,12 +452,12 @@ static void meth_rx(struct net_device* dev, unsigned long int_status) mace->eth.rx_fifo = priv->rx_ring_dmas[priv->rx_write]; ADVANCE_RX_PTR(priv->rx_write); } - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); /* In case there was underflow, and Rx DMA was disabled */ priv->dma_ctrl |= METH_DMA_RX_INT_EN | METH_DMA_RX_EN; mace->eth.dma_ctrl = priv->dma_ctrl; mace->eth.int_stat = METH_INT_RX_THRESHOLD; - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); } static int meth_tx_full(struct net_device *dev) @@ -470,11 +470,11 @@ static int meth_tx_full(struct net_device *dev) static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status) { struct meth_private *priv = netdev_priv(dev); - unsigned long status; + unsigned long status, flags; struct sk_buff *skb; unsigned long rptr = (int_status&TX_INFO_RPTR) >> 16; - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); /* Stop DMA notification */ priv->dma_ctrl &= ~(METH_DMA_TX_INT_EN); @@ -527,12 +527,13 @@ static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status) } mace->eth.int_stat = METH_INT_TX_EMPTY | METH_INT_TX_PKT; - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); } static void meth_error(struct net_device* dev, unsigned status) { struct meth_private *priv = netdev_priv(dev); + unsigned long flags; printk(KERN_WARNING "meth: error status: 0x%08x\n",status); /* check for errors too... */ @@ -547,7 +548,7 @@ static void meth_error(struct net_device* dev, unsigned status) printk(KERN_WARNING "meth: Rx overflow\n"); if (status & (METH_INT_RX_UNDERFLOW)) { printk(KERN_WARNING "meth: Rx underflow\n"); - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); mace->eth.int_stat = METH_INT_RX_UNDERFLOW; /* more underflow interrupts will be delivered, * effectively throwing us into an infinite loop. @@ -555,7 +556,7 @@ static void meth_error(struct net_device* dev, unsigned status) priv->dma_ctrl &= ~METH_DMA_RX_EN; mace->eth.dma_ctrl = priv->dma_ctrl; DPRINTK("Disabled meth Rx DMA temporarily\n"); - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); } mace->eth.int_stat = METH_INT_ERROR; } -- cgit v0.10.2 From a598f6aebea2481531b0757ed90cfb0d8cf1d8f5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 15 May 2009 06:10:13 +0000 Subject: bridge: relay bridge multicast pkgs if !STP Currently the bridge catches all STP packets; even if STP is turned off. This prevents other systems (which do have STP turned on) from being able to detect loops in the network. With this patch, if STP is off, then any packet sent to the STP multicast group address is forwarded to all ports. Based on earlier patch by Joakim Tjernlund with changes to go through forwarding (not local chain), and optimization that only last octet needs to be checked. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 30b8877..5ee1a36 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -134,6 +134,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_PAUSE)) goto drop; + /* If STP is turned off, then forward */ + if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) + goto forward; + if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) return NULL; /* frame consumed by filter */ @@ -141,6 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) return skb; /* continue processing */ } +forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); -- cgit v0.10.2 From 4f0611af47e25807cf18cd2b4d4e94206c75b29e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 15 May 2009 06:11:58 +0000 Subject: bridge: fix initial packet flood if !STP If bridge is configured with no STP and forwarding delay of 0 (which is typical for virtualization) then when link starts it will flood all packets for the first 20 seconds. This bug was introduced by a combination of earlier changes: * forwarding database uses hold time of zero to indicate user wants to always flood packets * optimzation of the case of forwarding delay of 0 avoids the initial timer tick The fix is to just skip all the topology change detection code if kernel STP is not being used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 6e63ec3..0660515 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -297,6 +297,9 @@ void br_topology_change_detection(struct net_bridge *br) { int isroot = br_is_root_bridge(br); + if (br->stp_enabled != BR_KERNEL_STP) + return; + pr_info("%s: topology change detected, %s\n", br->dev->name, isroot ? "propagating" : "sending tcn bpdu"); -- cgit v0.10.2 From 4cd6fe1c6483cde93e2ec91f58b7af9c9eea51ad Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 15 May 2009 08:44:32 +0000 Subject: bonding: fix link down handling in 802.3ad mode One of the purposes of bonding is to allow for redundant links, and failover correctly if the cable is pulled. If all the members of a bonded device have no carrier present, the bonded device itself needs to report no carrier present to user space so management tools (like routing daemons) can respond. Bonding in 802.3ad mode does not work correctly for this because it incorrectly chooses a link that is down as a possible aggregator. Signed-off-by: Stephen Hemminger Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 8c2e5ab..faf094a 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1465,6 +1465,12 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, return best; } +static int agg_device_up(const struct aggregator *agg) +{ + return (netif_running(agg->slave->dev) && + netif_carrier_ok(agg->slave->dev)); +} + /** * ad_agg_selection_logic - select an aggregation group for a team * @aggregator: the aggregator we're looking at @@ -1496,14 +1502,13 @@ static void ad_agg_selection_logic(struct aggregator *agg) struct port *port; origin = agg; - active = __get_active_agg(agg); - best = active; + best = (active && agg_device_up(active)) ? active : NULL; do { agg->is_active = 0; - if (agg->num_of_ports) + if (agg->num_of_ports && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } while ((agg = __get_next_agg(agg))); -- cgit v0.10.2 From eb0519b5a1cf07d5a1b919cffa0304cdaaa97a7e Mon Sep 17 00:00:00 2001 From: Gabriel Paubert Date: Sun, 17 May 2009 21:16:47 -0700 Subject: mv643xx_eth: fix PPC DMA breakage After 2.6.29, PPC no more admits passing NULL to the dev parameter of the DMA API. The result is a BUG followed by solid lock-up when the mv643xx_eth driver brings an interface up. The following patch makes the driver work on my Pegasos again; it is mostly a search and replace of NULL by mp->dev->dev.parent in dma allocation/freeing/mapping/unmapping functions. Signed-off-by: Gabriel Paubert Acked-by: Lennert Buytenhek Signed-off-by: David S. Miller diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index a400d71..6bb5af3 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -569,7 +569,7 @@ static int rxq_process(struct rx_queue *rxq, int budget) if (rxq->rx_curr_desc == rxq->rx_ring_size) rxq->rx_curr_desc = 0; - dma_unmap_single(NULL, rx_desc->buf_ptr, + dma_unmap_single(mp->dev->dev.parent, rx_desc->buf_ptr, rx_desc->buf_size, DMA_FROM_DEVICE); rxq->rx_desc_count--; rx++; @@ -678,8 +678,9 @@ static int rxq_refill(struct rx_queue *rxq, int budget) rx_desc = rxq->rx_desc_area + rx; - rx_desc->buf_ptr = dma_map_single(NULL, skb->data, - mp->skb_size, DMA_FROM_DEVICE); + rx_desc->buf_ptr = dma_map_single(mp->dev->dev.parent, + skb->data, mp->skb_size, + DMA_FROM_DEVICE); rx_desc->buf_size = mp->skb_size; rxq->rx_skb[rx] = skb; wmb(); @@ -718,6 +719,7 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb) static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) { + struct mv643xx_eth_private *mp = txq_to_mp(txq); int nr_frags = skb_shinfo(skb)->nr_frags; int frag; @@ -746,10 +748,10 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) desc->l4i_chk = 0; desc->byte_cnt = this_frag->size; - desc->buf_ptr = dma_map_page(NULL, this_frag->page, - this_frag->page_offset, - this_frag->size, - DMA_TO_DEVICE); + desc->buf_ptr = dma_map_page(mp->dev->dev.parent, + this_frag->page, + this_frag->page_offset, + this_frag->size, DMA_TO_DEVICE); } } @@ -826,7 +828,8 @@ no_csum: desc->l4i_chk = l4i_chk; desc->byte_cnt = length; - desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); + desc->buf_ptr = dma_map_single(mp->dev->dev.parent, skb->data, + length, DMA_TO_DEVICE); __skb_queue_tail(&txq->tx_skb, skb); @@ -956,10 +959,10 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) } if (cmd_sts & TX_FIRST_DESC) { - dma_unmap_single(NULL, desc->buf_ptr, + dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr, desc->byte_cnt, DMA_TO_DEVICE); } else { - dma_unmap_page(NULL, desc->buf_ptr, + dma_unmap_page(mp->dev->dev.parent, desc->buf_ptr, desc->byte_cnt, DMA_TO_DEVICE); } @@ -1894,9 +1897,9 @@ static int rxq_init(struct mv643xx_eth_private *mp, int index) mp->rx_desc_sram_size); rxq->rx_desc_dma = mp->rx_desc_sram_addr; } else { - rxq->rx_desc_area = dma_alloc_coherent(NULL, size, - &rxq->rx_desc_dma, - GFP_KERNEL); + rxq->rx_desc_area = dma_alloc_coherent(mp->dev->dev.parent, + size, &rxq->rx_desc_dma, + GFP_KERNEL); } if (rxq->rx_desc_area == NULL) { @@ -1947,7 +1950,7 @@ out_free: if (index == 0 && size <= mp->rx_desc_sram_size) iounmap(rxq->rx_desc_area); else - dma_free_coherent(NULL, size, + dma_free_coherent(mp->dev->dev.parent, size, rxq->rx_desc_area, rxq->rx_desc_dma); @@ -1979,7 +1982,7 @@ static void rxq_deinit(struct rx_queue *rxq) rxq->rx_desc_area_size <= mp->rx_desc_sram_size) iounmap(rxq->rx_desc_area); else - dma_free_coherent(NULL, rxq->rx_desc_area_size, + dma_free_coherent(mp->dev->dev.parent, rxq->rx_desc_area_size, rxq->rx_desc_area, rxq->rx_desc_dma); kfree(rxq->rx_skb); @@ -2007,9 +2010,9 @@ static int txq_init(struct mv643xx_eth_private *mp, int index) mp->tx_desc_sram_size); txq->tx_desc_dma = mp->tx_desc_sram_addr; } else { - txq->tx_desc_area = dma_alloc_coherent(NULL, size, - &txq->tx_desc_dma, - GFP_KERNEL); + txq->tx_desc_area = dma_alloc_coherent(mp->dev->dev.parent, + size, &txq->tx_desc_dma, + GFP_KERNEL); } if (txq->tx_desc_area == NULL) { @@ -2053,7 +2056,7 @@ static void txq_deinit(struct tx_queue *txq) txq->tx_desc_area_size <= mp->tx_desc_sram_size) iounmap(txq->tx_desc_area); else - dma_free_coherent(NULL, txq->tx_desc_area_size, + dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size, txq->tx_desc_area, txq->tx_desc_dma); } -- cgit v0.10.2 From d77dd8d27e73a9277096453eb901aae0bfd4ca3c Mon Sep 17 00:00:00 2001 From: roel kluin Date: Fri, 15 May 2009 10:19:51 +0000 Subject: Neterion: *FIFO1_DMA_ERR set twice, should 2nd be *FIFO2_DMA_ERR? FIFO1_DMA_ERR is set twice, the second should be FIFO2_DMA_ERR. Signed-off-by: Roel Kluin Acked-by: Ram Vepa Signed-off-by: David S. Miller diff --git a/drivers/net/vxge/vxge-traffic.c b/drivers/net/vxge/vxge-traffic.c index 7be0ae10..c2eeac4 100644 --- a/drivers/net/vxge/vxge-traffic.c +++ b/drivers/net/vxge/vxge-traffic.c @@ -115,7 +115,7 @@ enum vxge_hw_status vxge_hw_vpath_intr_enable(struct __vxge_hw_vpath_handle *vp) VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO1_POISON| VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO2_POISON| VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO1_DMA_ERR| - VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO1_DMA_ERR), 0, 32), + VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO2_DMA_ERR), 0, 32), &vp_reg->kdfcctl_errors_mask); __vxge_hw_pio_mem_write32_upper(0, &vp_reg->vpath_ppif_int_mask); -- cgit v0.10.2 From 705efc3b03cbee449e4d83b230423894152f7982 Mon Sep 17 00:00:00 2001 From: Wang Tinggong Date: Thu, 14 May 2009 22:49:36 +0000 Subject: Doc: fixed descriptions on /proc/sys/net/core/* and /proc/sys/net/unix/* Signed-off-by: Wang Tinggong Signed-off-by: David S. Miller diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ec5de02..b121c5d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1266,13 +1266,22 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max sctp_wmem - vector of 3 INTEGERs: min, default, max See tcp_wmem for a description. -UNDOCUMENTED: /proc/sys/net/core/* - dev_weight FIXME +dev_weight - INTEGER + The maximum number of packets that kernel can handle on a NAPI + interrupt, it's a Per-CPU variable. + + Default: 64 /proc/sys/net/unix/* - max_dgram_qlen FIXME +max_dgram_qlen - INTEGER + The maximum length of dgram socket receive queue + + Default: 10 + + +UNDOCUMENTED: /proc/sys/net/irda/* fast_poll_increase FIXME -- cgit v0.10.2 From 775273131810caa41dfc7f9e552ea5d8508caf40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sun, 10 May 2009 20:32:34 +0000 Subject: tcp: fix MSG_PEEK race check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 518a09ef11 (tcp: Fix recvmsg MSG_PEEK influence of blocking behavior) lets the loop run longer than the race check did previously expect, so we need to be more careful with this check and consider the work we have been doing. I tried my best to deal with urg hole madness too which happens here: if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; ... by using additional offset by one but I certainly have very little interest in testing that part. Signed-off-by: Ilpo Järvinen Tested-by: Frans Pop Tested-by: Ian Zimmermann Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1d7f49c..7a0f0b2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct task_struct *user_recv = NULL; int copied_early = 0; struct sk_buff *skb; + u32 urg_hole = 0; lock_sock(sk); @@ -1532,7 +1533,8 @@ do_prequeue: } } } - if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { + if ((flags & MSG_PEEK) && + (peek_seq - copied - urg_hole != tp->copied_seq)) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", current->comm, task_pid_nr(current)); @@ -1553,6 +1555,7 @@ do_prequeue: if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; + urg_hole++; offset++; used--; if (!used) -- cgit v0.10.2 From c0f84d0d4be3f7d818b4ffb04d27f9bae64397f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 May 2009 15:12:31 -0700 Subject: sch_teql: should not dereference skb after ndo_start_xmit() It is illegal to dereference a skb after a successful ndo_start_xmit() call. We must store skb length in a local variable instead. Bug was introduced in 2.6.27 by commit 0abf77e55a2459aa9905be4b226e4729d5b4f0cb (net_sched: Add accessor function for packet length for qdiscs) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ec697ce..3b64182 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -303,6 +303,8 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: if (__netif_tx_trylock(slave_txq)) { + unsigned int length = qdisc_pkt_len(skb); + if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == 0) { @@ -310,8 +312,7 @@ restart: master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; - master->stats.tx_bytes += - qdisc_pkt_len(skb); + master->stats.tx_bytes += length; return 0; } __netif_tx_unlock(slave_txq); -- cgit v0.10.2 From c4ca2374312b4de819dd700e72a68395eddb5fcb Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Mon, 18 May 2009 15:38:55 -0700 Subject: be2net: add two new pci device ids to pci device table Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h index c49ddd0..b4bb06f 100644 --- a/drivers/net/benet/be.h +++ b/drivers/net/benet/be.h @@ -35,8 +35,22 @@ #define DRV_VER "2.0.348" #define DRV_NAME "be2net" #define BE_NAME "ServerEngines BladeEngine2 10Gbps NIC" +#define OC_NAME "Emulex OneConnect 10Gbps NIC" #define DRV_DESC BE_NAME "Driver" +#define BE_VENDOR_ID 0x19a2 +#define BE_DEVICE_ID1 0x211 +#define OC_DEVICE_ID1 0x700 +#define OC_DEVICE_ID2 0x701 + +static inline char *nic_name(struct pci_dev *pdev) +{ + if (pdev->device == OC_DEVICE_ID1 || pdev->device == OC_DEVICE_ID2) + return OC_NAME; + else + return BE_NAME; +} + /* Number of bytes of an RX frame that are copied to skb->data */ #define BE_HDR_LEN 64 #define BE_MAX_JUMBO_FRAME_SIZE 9018 diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 30d0c81..5c378b5 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -28,10 +28,10 @@ static unsigned int rx_frag_size = 2048; module_param(rx_frag_size, uint, S_IRUGO); MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data."); -#define BE_VENDOR_ID 0x19a2 -#define BE2_DEVICE_ID_1 0x0211 static DEFINE_PCI_DEVICE_TABLE(be_dev_ids) = { - { PCI_DEVICE(BE_VENDOR_ID, BE2_DEVICE_ID_1) }, + { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, + { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) }, + { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) }, { 0 } }; MODULE_DEVICE_TABLE(pci, be_dev_ids); @@ -1859,7 +1859,7 @@ static int __devinit be_probe(struct pci_dev *pdev, if (status != 0) goto stats_clean; - dev_info(&pdev->dev, BE_NAME " port %d\n", adapter->port_num); + dev_info(&pdev->dev, "%s port %d\n", nic_name(pdev), adapter->port_num); return 0; stats_clean: @@ -1873,7 +1873,7 @@ rel_reg: disable_dev: pci_disable_device(pdev); do_none: - dev_warn(&pdev->dev, BE_NAME " initialization failed\n"); + dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev)); return status; } -- cgit v0.10.2 From 511e11e396dc596825ce04d53d7f6d579404bc01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 May 2009 19:26:37 -0700 Subject: pkt_sched: gen_estimator: use 64 bit intermediate counters for bps gen_estimator can overflow bps (bytes per second) with Gb links, while it was designed with a u32 API, with a theorical limit of 34360Mbit (2^32 bytes) Using 64 bit intermediate avbps/brate counters can allow us to reach this theorical limit. Signed-off-by: Eric Dumazet Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9cc9f95..6d62d46 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,9 +66,9 @@ NOTES. - * The stored value for avbps is scaled by 2^5, so that maximal - rate is ~1Gbit, avpps is scaled by 2^10. - + * avbps is scaled by 2^5, avpps is scaled by 2^10. + * both values are reported as 32 bit unsigned values. bps can + overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor for HZ=100 and HZ=1024 8)), maximal interval is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals @@ -86,9 +86,9 @@ struct gen_estimator spinlock_t *stats_lock; int ewma_log; u64 last_bytes; + u64 avbps; u32 last_packets; u32 avpps; - u32 avbps; struct rcu_head e_rcu; struct rb_node node; }; @@ -115,6 +115,7 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; + u64 brate; u32 npackets; u32 rate; @@ -125,9 +126,9 @@ static void est_timer(unsigned long arg) nbytes = e->bstats->bytes; npackets = e->bstats->packets; - rate = (nbytes - e->last_bytes)<<(7 - idx); + brate = (nbytes - e->last_bytes)<<(7 - idx); e->last_bytes = nbytes; - e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; + e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log; e->rate_est->bps = (e->avbps+0xF)>>5; rate = (npackets - e->last_packets)<<(12 - idx); -- cgit v0.10.2 From 995b337952cdf7e05d288eede580257b632a8343 Mon Sep 17 00:00:00 2001 From: Thomas Chenault Date: Mon, 18 May 2009 21:43:27 -0700 Subject: net: fix skb_seq_read returning wrong offset/length for page frag data When called with a consumed value that is less than skb_headlen(skb) bytes into a page frag, skb_seq_read() incorrectly returns an offset/length relative to skb->data. Ensure that data which should come from a page frag does. Signed-off-by: Thomas Chenault Tested-by: Shyam Iyer Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d152394..e505b53 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2288,7 +2288,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; - if (abs_offset < block_limit) { + if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } -- cgit v0.10.2 From bc8a5397433e4effbaddfa7e462d10b3c060cabb Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Mon, 18 May 2009 21:48:38 -0700 Subject: ipv4: make default for INET_LRO consistent with help text Commit e81963b1 ("ipv4: Make INET_LRO a bool instead of tristate.") changed this config from tristate to bool. Add default so that it is consistent with the help text. Signed-off-by: Frans Pop Signed-off-by: David S. Miller diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9d26a3d..5b919f7 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -408,7 +408,7 @@ config INET_XFRM_MODE_BEET config INET_LRO bool "Large Receive Offload (ipv4/tcp)" - + default y ---help--- Support for Large Receive Offload (ipv4/tcp). -- cgit v0.10.2