From efba01803c8570bab11d0d6188a630231d0ddccf Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 3 Dec 2008 00:36:15 -0800 Subject: bnx2: Add workaround to handle missed MSI. The bnx2 chips do not support per MSI vector masking. On 5706/5708, new MSI address/data are stored only when the MSI enable bit is toggled. As a result, SMP affinity no longer works in the latest kernel. A more serious problem is that the driver will no longer receive interrupts when the MSI receiving CPU goes offline. The workaround in this patch only addresses the problem of CPU going offline. When that happens, the driver's timer function will detect that it is making no forward progress on pending interrupt events and will recover from it. Eric Dumazet reported the problem. We also found that if an interrupt is internally asserted while MSI and INTA are disabled, the chip will end up in the same state after MSI is re-enabled. The same workaround is needed for this problem. Signed-off-by: Michael Chan Tested-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index d07e3f1..a1a3d0e 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3144,6 +3144,28 @@ bnx2_has_work(struct bnx2_napi *bnapi) return 0; } +static void +bnx2_chk_missed_msi(struct bnx2 *bp) +{ + struct bnx2_napi *bnapi = &bp->bnx2_napi[0]; + u32 msi_ctrl; + + if (bnx2_has_work(bnapi)) { + msi_ctrl = REG_RD(bp, BNX2_PCICFG_MSI_CONTROL); + if (!(msi_ctrl & BNX2_PCICFG_MSI_CONTROL_ENABLE)) + return; + + if (bnapi->last_status_idx == bp->idle_chk_status_idx) { + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl & + ~BNX2_PCICFG_MSI_CONTROL_ENABLE); + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl); + bnx2_msi(bp->irq_tbl[0].vector, bnapi); + } + } + + bp->idle_chk_status_idx = bnapi->last_status_idx; +} + static void bnx2_poll_link(struct bnx2 *bp, struct bnx2_napi *bnapi) { struct status_block *sblk = bnapi->status_blk.msi; @@ -3218,14 +3240,15 @@ static int bnx2_poll(struct napi_struct *napi, int budget) work_done = bnx2_poll_work(bp, bnapi, work_done, budget); - if (unlikely(work_done >= budget)) - break; - /* bnapi->last_status_idx is used below to tell the hw how * much work has been processed, so we must read it before * checking for more work. */ bnapi->last_status_idx = sblk->status_idx; + + if (unlikely(work_done >= budget)) + break; + rmb(); if (likely(!bnx2_has_work(bnapi))) { netif_rx_complete(bp->dev, napi); @@ -4570,6 +4593,8 @@ bnx2_init_chip(struct bnx2 *bp) for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) bp->bnx2_napi[i].last_status_idx = 0; + bp->idle_chk_status_idx = 0xffff; + bp->rx_mode = BNX2_EMAC_RX_MODE_SORT_MODE; /* Set up how to generate a link change interrupt. */ @@ -5718,6 +5743,10 @@ bnx2_timer(unsigned long data) if (atomic_read(&bp->intr_sem) != 0) goto bnx2_restart_timer; + if ((bp->flags & (BNX2_FLAG_USING_MSI | BNX2_FLAG_ONE_SHOT_MSI)) == + BNX2_FLAG_USING_MSI) + bnx2_chk_missed_msi(bp); + bnx2_send_heart_beat(bp); bp->stats_blk->stat_FwRxDrop = diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index 617d953..0b032c3 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -378,6 +378,9 @@ struct l2_fhdr { * pci_config_l definition * offset: 0000 */ +#define BNX2_PCICFG_MSI_CONTROL 0x00000058 +#define BNX2_PCICFG_MSI_CONTROL_ENABLE (1L<<16) + #define BNX2_PCICFG_MISC_CONFIG 0x00000068 #define BNX2_PCICFG_MISC_CONFIG_TARGET_BYTE_SWAP (1L<<2) #define BNX2_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP (1L<<3) @@ -6863,6 +6866,9 @@ struct bnx2 { u8 num_tx_rings; u8 num_rx_rings; + + u32 idle_chk_status_idx; + }; #define REG_RD(bp, offset) \ -- cgit v0.10.2 From d25830e5507f6bc815f5dd7e2eb65f172e878a2b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 3 Dec 2008 00:37:04 -0800 Subject: netlabel: Fix a potential NULL pointer dereference Fix a potential NULL pointer dereference seen when trying to remove a static label configuration with an invalid address/mask combination. Signed-off-by: Paul Moore Signed-off-by: David S. Miller diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e8a5c32..90c8506 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -574,9 +574,10 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); - if (list_entry == NULL) + if (list_entry != NULL) + entry = netlbl_unlhsh_addr4_entry(list_entry); + else ret_val = -ENOENT; - entry = netlbl_unlhsh_addr4_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -634,9 +635,10 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); - if (list_entry == NULL) + if (list_entry != NULL) + entry = netlbl_unlhsh_addr6_entry(list_entry); + else ret_val = -ENOENT; - entry = netlbl_unlhsh_addr6_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); -- cgit v0.10.2 From bd7df219202f44e71e2e975a0fb5f76f946c1aef Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Mon, 1 Dec 2008 02:37:20 +0000 Subject: Phonet: do not dump addresses from other namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index b1770d6..242fe8f 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -123,6 +123,7 @@ nla_put_failure: static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct phonet_device *pnd; int dev_idx = 0, dev_start_idx = cb->args[0]; int addr_idx = 0, addr_start_idx = cb->args[1]; @@ -131,6 +132,8 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(pnd, &pndevs.list, list) { u8 addr; + if (!net_eq(dev_net(pnd->netdev), net)) + continue; if (dev_idx > dev_start_idx) addr_start_idx = 0; if (dev_idx++ < dev_start_idx) -- cgit v0.10.2 From d253eee20195b25e298bf162a6e72f14bf4803e5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 3 Dec 2008 15:52:35 -0800 Subject: can: Fix CAN_(EFF|RTR)_FLAG handling in can_filter Due to a wrong safety check in af_can.c it was not possible to filter for SFF frames with a specific CAN identifier without getting the same selected CAN identifier from a received EFF frame also. This fix has a minimum (but user visible) impact on the CAN filter API and therefore the CAN version is set to a new date. Indeed the 'old' API is still working as-is. But when now setting CAN_(EFF|RTR)_FLAG in can_filter.can_mask you might get less traffic than before - but still the stuff that you expected to get for your defined filter ... Thanks to Kurt Van Dijck for pointing at this issue and for the review. Signed-off-by: Oliver Hartkopp Acked-by: Kurt Van Dijck Signed-off-by: David S. Miller diff --git a/include/linux/can/core.h b/include/linux/can/core.h index e9ca210..f50785a 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include #include -#define CAN_VERSION "20071116" +#define CAN_VERSION "20081130" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" diff --git a/net/can/af_can.c b/net/can/af_can.c index 7d4d2b3..d8173e5 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -319,23 +319,52 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) return n ? d : NULL; } +/** + * find_rcv_list - determine optimal filterlist inside device filter struct + * @can_id: pointer to CAN identifier of a given can_filter + * @mask: pointer to CAN mask of a given can_filter + * @d: pointer to the device filter struct + * + * Description: + * Returns the optimal filterlist to reduce the filter handling in the + * receive path. This function is called by service functions that need + * to register or unregister a can_filter in the filter lists. + * + * A filter matches in general, when + * + * & mask == can_id & mask + * + * so every bit set in the mask (even CAN_EFF_FLAG, CAN_RTR_FLAG) describe + * relevant bits for the filter. + * + * The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can + * filter for error frames (CAN_ERR_FLAG bit set in mask). For error frames + * there is a special filterlist and a special rx path filter handling. + * + * Return: + * Pointer to optimal filterlist for the given can_id/mask pair. + * Constistency checked mask. + * Reduced can_id to have a preprocessed filter compare value. + */ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, struct dev_rcv_lists *d) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ - /* filter error frames */ + /* filter for error frames in extra filterlist */ if (*mask & CAN_ERR_FLAG) { - /* clear CAN_ERR_FLAG in list entry */ + /* clear CAN_ERR_FLAG in filter entry */ *mask &= CAN_ERR_MASK; return &d->rx[RX_ERR]; } - /* ensure valid values in can_mask */ - if (*mask & CAN_EFF_FLAG) - *mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG); - else - *mask &= (CAN_SFF_MASK | CAN_RTR_FLAG); + /* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */ + +#define CAN_EFF_RTR_FLAGS (CAN_EFF_FLAG | CAN_RTR_FLAG) + + /* ensure valid values in can_mask for 'SFF only' frame filtering */ + if ((*mask & CAN_EFF_FLAG) && !(*can_id & CAN_EFF_FLAG)) + *mask &= (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS); /* reduce condition testing at receive time */ *can_id &= *mask; @@ -348,15 +377,19 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, if (!(*mask)) return &d->rx[RX_ALL]; - /* use extra filterset for the subscription of exactly *ONE* can_id */ - if (*can_id & CAN_EFF_FLAG) { - if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) { - /* RFC: a use-case for hash-tables in the future? */ - return &d->rx[RX_EFF]; + /* extra filterlists for the subscription of a single non-RTR can_id */ + if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) + && !(*can_id & CAN_RTR_FLAG)) { + + if (*can_id & CAN_EFF_FLAG) { + if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) { + /* RFC: a future use-case for hash-tables? */ + return &d->rx[RX_EFF]; + } + } else { + if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) + return &d->rx_sff[*can_id]; } - } else { - if (*mask == CAN_SFF_MASK) - return &d->rx_sff[*can_id]; } /* default: filter via can_id/can_mask */ diff --git a/net/can/bcm.c b/net/can/bcm.c index d0dd382..da0d426 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -64,10 +64,11 @@ #define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */ /* get best masking value for can_rx_register() for a given single can_id */ -#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \ - (CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK)) +#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ + (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ + (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -#define CAN_BCM_VERSION "20080415" +#define CAN_BCM_VERSION CAN_VERSION static __initdata const char banner[] = KERN_INFO "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n"; -- cgit v0.10.2 From 1c594c05a75770ab53a329fc4eb99c797a4bc7d7 Mon Sep 17 00:00:00 2001 From: Wilfried Klaebe Date: Wed, 3 Dec 2008 20:57:19 -0800 Subject: b1isa: fix b1isa_exit() to really remove registered capi controllers On "/etc/init.d/capiutils stop", this oops happened. The oops happens on reading /proc/capi/controllers because capi_ctrl->procinfo is called for the wrongly not unregistered controller, which points to b1isa_procinfo(), which was removed on module unload. b1isa_exit() did not call b1isa_remove() for its controllers because io[0] == 0 on module unload despite having been 0x340 on module load. Besides, just removing the controllers that where added on module load time and not those that were added later via b1isa_add_card() is wrong too - the place where all added cards are found is isa_dev[]. relevant dmesg lines: [ 0.000000] Linux version 2.6.27.4 (w@shubashi) (gcc version 4.3.2 (Debian 4.3.2-1) ) #3 Thu Oct 30 16:49:03 CET 2008 [ 67.403555] CAPI Subsystem Rev 1.1.2.8 [ 68.529154] capifs: Rev 1.1.2.3 [ 68.563292] capi20: Rev 1.1.2.7: started up with major 68 (middleware+capifs) [ 77.026936] b1: revision 1.1.2.2 [ 77.049992] b1isa: revision 1.1.2.3 [ 77.722655] kcapi: Controller [001]: b1isa-340 attached [ 77.722671] b1isa: AVM B1 ISA at i/o 0x340, irq 5, revision 255 [ 81.272669] b1isa-340: card 1 "B1" ready. [ 81.272683] b1isa-340: card 1 Protocol: DSS1 [ 81.272689] b1isa-340: card 1 Linetype: point to multipoint [ 81.272695] b1isa-340: B1-card (3.11-03) now active [ 81.272702] kcapi: card [001] "b1isa-340" ready. [ 153.721281] kcapi: card [001] down. [ 154.151889] BUG: unable to handle kernel paging request at e87af000 [ 154.152081] IP: [] [ 154.153292] *pde = 2655b067 *pte = 00000000 [ 154.153307] Oops: 0000 [#1] [ 154.153360] Modules linked in: rfcomm l2cap ppdev lp ipt_MASQUERADE tun capi capifs kernelcapi ac battery nfsd exportfs nfs lockd nfs_acl sunrpc sit tunnel4 bridge stp llc ipt_REJECT ipt_LOG xt_tcpudp xt_state iptable_filter iptable_mangle iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack ip_tables x_tables nls_utf8 isofs nls_base zlib_inflate loop ipv6 netconsole snd_via82xx dvb_usb_dib0700 gameport dib7000p dib7000m dvb_usb snd_ac97_codec ac97_bus dvb_core mt2266 snd_pcm tuner_xc2028 dib3000mc dibx000_common mt2060 dib0070 snd_page_alloc snd_mpu401_uart snd_seq_midi snd_seq_midi_event btusb snd_rawmidi bluetooth snd_seq snd_timer snd_seq_device snd via686a i2c_viapro soundcore i2c_core parport_pc parport button dm_mirror dm_log dm_snapshot floppy sg ohci1394 uhci_hcd ehci_hcd 8139too mii ieee1394 usbcore sr_mod cdrom sd_mod thermal processor fan [last unloaded: b1] [ 154.153360] [ 154.153360] Pid: 4132, comm: capiinit Not tainted (2.6.27.4 #3) [ 154.153360] EIP: 0060:[] EFLAGS: 00010286 CPU: 0 [ 154.153360] EIP is at 0xe87af000 [ 154.153360] EAX: e6b9ccc8 EBX: e6b9ccc8 ECX: e87a0c67 EDX: e87af000 [ 154.153360] ESI: e142bbc0 EDI: e87a56e0 EBP: e0505f0c ESP: e0505ee4 [ 154.153360] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 [ 154.153360] Process capiinit (pid: 4132, ti=e0504000 task=d1196cf0 task.ti=e0504000) [ 154.153360] Stack: e879f650 00000246 e0505ef4 c01472eb e0505f0c 00000246 e7001780 fffffff4 [ 154.153360] fffffff4 e142bbc0 e0505f48 c01a56c6 00000400 b805e000 d102dc80 e142bbe0 [ 154.153360] 00000000 e87a56e0 00000246 e12617ac 00000000 00000000 e1261760 fffffffb [ 154.153360] Call Trace: [ 154.153360] [] ? controller_show+0x20/0x90 [kernelcapi] [ 154.153360] [] ? trace_hardirqs_on+0xb/0x10 [ 154.153360] [] ? seq_read+0x126/0x2f0 [ 154.153360] [] ? seq_read+0x0/0x2f0 [ 154.153360] [] ? proc_reg_read+0x5c/0x90 [ 154.153360] [] ? vfs_read+0x99/0x140 [ 154.153360] [] ? proc_reg_read+0x0/0x90 [ 154.153360] [] ? sys_read+0x3d/0x70 [ 154.153360] [] ? sysenter_do_call+0x12/0x35 [ 154.153360] ======================= [ 154.153360] Code: Bad EIP value. [ 154.153360] EIP: [] 0xe87af000 SS:ESP 0068:e0505ee4 [ 154.153360] ---[ end trace 23750b6c2862de94 ]--- Signed-off-by: Wilfried Klaebe Signed-off-by: Andrew Morton Acked-by: Karsten Keil Signed-off-by: David S. Miller diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c index 1e288ee..6461a32 100644 --- a/drivers/isdn/hardware/avm/b1isa.c +++ b/drivers/isdn/hardware/avm/b1isa.c @@ -233,10 +233,8 @@ static void __exit b1isa_exit(void) int i; for (i = 0; i < MAX_CARDS; i++) { - if (!io[i]) - break; - - b1isa_remove(&isa_dev[i]); + if (isa_dev[i].resource[0].start) + b1isa_remove(&isa_dev[i]); } unregister_capi_driver(&capi_driver_b1isa); } -- cgit v0.10.2 From bd0914104c61a852baf469b2d807322e5d0459b4 Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Mon, 1 Dec 2008 12:24:25 +0000 Subject: hysdn: fix writing outside the field on 64 bits ifa_local is assumed to be unsigned long which lead to writing the address at dev->dev_addr-2 instead of +2 noticed thanks to gcc: drivers/isdn/hysdn/hysdn_net.c: In function `net_open': drivers/isdn/hysdn/hysdn_net.c:91: warning: array subscript is below array bounds Signed-off-by: Pascal Terjan Signed-off-by: Andrew Morton Signed-off-by: David S. Miller diff --git a/drivers/isdn/hysdn/hysdn_net.c b/drivers/isdn/hysdn/hysdn_net.c index cfa8fa5..3f2a0a2 100644 --- a/drivers/isdn/hysdn/hysdn_net.c +++ b/drivers/isdn/hysdn/hysdn_net.c @@ -83,12 +83,12 @@ net_open(struct net_device *dev) /* Fill in the MAC-level header (if not already set) */ if (!card->mac_addr[0]) { - for (i = 0; i < ETH_ALEN - sizeof(unsigned long); i++) + for (i = 0; i < ETH_ALEN; i++) dev->dev_addr[i] = 0xfc; if ((in_dev = dev->ip_ptr) != NULL) { struct in_ifaddr *ifa = in_dev->ifa_list; if (ifa != NULL) - memcpy(dev->dev_addr + (ETH_ALEN - sizeof(unsigned long)), &ifa->ifa_local, sizeof(unsigned long)); + memcpy(dev->dev_addr + (ETH_ALEN - sizeof(ifa->ifa_local)), &ifa->ifa_local, sizeof(ifa->ifa_local)); } } else memcpy(dev->dev_addr, card->mac_addr, ETH_ALEN); -- cgit v0.10.2 From 5176da7e5318669220e4d2fa856223054a3efc9f Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 2 Dec 2008 05:07:01 +0000 Subject: enc28j60: Fix sporadic packet loss (corrected again) Packet data read from the RX buffer the when the RSV is at the end of the RX buffer does not warp around. This causes packet loss, as the actual data is never read. Fix this by calculating the right packet data location. Thanks to Shachar Shemesh for suggesting the fix. Signed-off-by: Baruch Siach Acked-by: Claudio Lanconelli Signed-off-by: David S. Miller diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c index e1b441e..c414554 100644 --- a/drivers/net/enc28j60.c +++ b/drivers/net/enc28j60.c @@ -568,6 +568,17 @@ static u16 erxrdpt_workaround(u16 next_packet_ptr, u16 start, u16 end) return erxrdpt; } +/* + * Calculate wrap around when reading beyond the end of the RX buffer + */ +static u16 rx_packet_start(u16 ptr) +{ + if (ptr + RSV_SIZE > RXEND_INIT) + return (ptr + RSV_SIZE) - (RXEND_INIT - RXSTART_INIT + 1); + else + return ptr + RSV_SIZE; +} + static void nolock_rxfifo_init(struct enc28j60_net *priv, u16 start, u16 end) { u16 erxrdpt; @@ -938,8 +949,9 @@ static void enc28j60_hw_rx(struct net_device *ndev) skb->dev = ndev; skb_reserve(skb, NET_IP_ALIGN); /* copy the packet from the receive buffer */ - enc28j60_mem_read(priv, priv->next_pk_ptr + sizeof(rsv), - len, skb_put(skb, len)); + enc28j60_mem_read(priv, + rx_packet_start(priv->next_pk_ptr), + len, skb_put(skb, len)); if (netif_msg_pktdata(priv)) dump_packet(__func__, skb->len, skb->data); skb->protocol = eth_type_trans(skb, ndev); -- cgit v0.10.2 From f8269a495a1924f8b023532dd3e77423432db810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 3 Dec 2008 21:24:48 -0800 Subject: tcp: make urg+gso work for real this time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I should have noticed this earlier... :-) The previous solution to URG+GSO/TSO will cause SACK block tcp_fragment to do zig-zig patterns, or even worse, a steep downward slope into packet counting because each skb pcount would be truncated to pcount of 2 and then the following fragments of the later portion would restore the window again. Basically this reverts "tcp: Do not use TSO/GSO when there is urgent data" (33cf71cee1). It also removes some unnecessary code from tcp_current_mss that didn't work as intented either (could be that something was changed down the road, or it might have been broken since the dawn of time) because it only works once urg is already written while this bug shows up starting from ~64k before the urg point. The retransmissions already are split to mss sized chunks, so only new data sending paths need splitting in case they have a segment otherwise suitable for gso/tso. The actually check can be improved to be more narrow but since this is late -rc already, I'll postpone thinking the more fine-grained things. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 85b07eb..fe3b4bd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -722,8 +722,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk) || - tcp_urg_mode(tcp_sk(sk))) { + if (skb->len <= mss_now || !sk_can_gso(sk)) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1029,10 +1028,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. - * - * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up - * cannot be large. However, taking into account rare use of URG, this - * is not a big flaw. */ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) { @@ -1047,7 +1042,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) + if (large_allowed && sk_can_gso(sk)) doing_tso = 1; if (dst) { @@ -1164,9 +1159,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || - (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || - tcp_urg_mode(tcp_sk(sk))))) { + if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } @@ -1519,6 +1512,10 @@ static int tcp_mtu_probe(struct sock *sk) * send_head. This happens as incoming acks open up the remote * window for us. * + * LARGESEND note: !tcp_urg_mode is overkill, only frames between + * snd_up-64k-mss .. snd_up cannot be large. However, taking into + * account rare use of URG, this is not a big flaw. + * * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */ @@ -1570,7 +1567,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) } limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); @@ -1619,6 +1616,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); unsigned int tso_segs, cwnd_quota; @@ -1633,7 +1631,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!tso_segs); limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); -- cgit v0.10.2 From 2cc002c4bbce4d918ab94b494d61c6991c907d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 3 Dec 2008 22:18:59 -0800 Subject: netx-eth: initialize per device spinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spinlock used in the netx-eth driver was never properly initialized. This was noticed using CONFIG_DEBUG_SPINLOCK=y Signed-off-by: Uwe Kleine-König Acked-by: Sascha Hauer Signed-off-by: David S. Miller diff --git a/drivers/net/netx-eth.c b/drivers/net/netx-eth.c index b9bed82..b289a0a 100644 --- a/drivers/net/netx-eth.c +++ b/drivers/net/netx-eth.c @@ -401,6 +401,8 @@ static int netx_eth_drv_probe(struct platform_device *pdev) priv->xmac_base = priv->xc->xmac_base; priv->sram_base = priv->xc->sram_base; + spin_lock_init(&priv->lock); + ret = pfifo_request(PFIFO_MASK(priv->id)); if (ret) { printk("unable to request PFIFO\n"); -- cgit v0.10.2 From 17b24b3c97498935a2ef9777370b1151dfed3f6f Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Thu, 4 Dec 2008 14:58:13 -0800 Subject: ATM: CVE-2008-5079: duplicate listen() on socket corrupts the vcc table As reported by Hugo Dias that it is possible to cause a local denial of service attack by calling the svc_listen function twice on the same socket and reading /proc/net/atm/*vc Signed-off-by: Chas Williams Signed-off-by: David S. Miller diff --git a/net/atm/svc.c b/net/atm/svc.c index de1e4f2..8fb54dc 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -293,7 +293,10 @@ static int svc_listen(struct socket *sock,int backlog) error = -EINVAL; goto out; } - vcc_insert_socket(sk); + if (test_bit(ATM_VF_LISTEN, &vcc->flags)) { + error = -EADDRINUSE; + goto out; + } set_bit(ATM_VF_WAITING, &vcc->flags); prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_listen,NULL,NULL,&vcc->local); @@ -307,6 +310,7 @@ static int svc_listen(struct socket *sock,int backlog) goto out; } set_bit(ATM_VF_LISTEN,&vcc->flags); + vcc_insert_socket(sk); sk->sk_max_ack_backlog = backlog > 0 ? backlog : ATM_BACKLOG_DEFAULT; error = -sk->sk_err; out: -- cgit v0.10.2 From f706644d55f90e8306d87060168fef33804d6dd9 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 4 Dec 2008 15:01:08 -0800 Subject: can: omit received RTR frames for single ID filter lists Since commit d253eee20195b25e298bf162a6e72f14bf4803e5 the single CAN identifier filter lists handle only non-RTR CAN frames. So we need to omit the check of these filter lists when receiving RTR CAN frames. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller diff --git a/net/can/af_can.c b/net/can/af_can.c index d8173e5..3dadb33 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -622,7 +622,10 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } } - /* check CAN_ID specific entries */ + /* check filterlists for single non-RTR can_ids */ + if (can_id & CAN_RTR_FLAG) + return matches; + if (can_id & CAN_EFF_FLAG) { hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) { if (r->can_id == can_id) { -- cgit v0.10.2 From a6af2d6ba5797c556fba0cd3a19e5f3bc9a99b76 Mon Sep 17 00:00:00 2001 From: Doug Leith Date: Thu, 4 Dec 2008 17:17:18 -0800 Subject: tcp: tcp_vegas ssthresh bug fix This patch fixes a bug in tcp_vegas.c. At the moment this code leaves ssthresh untouched. However, this means that the vegas congestion control algorithm is effectively unable to reduce cwnd below the ssthresh value (if the vegas update lowers the cwnd below ssthresh, then slow start is activated to raise it back up). One example where this matters is when during slow start cwnd overshoots the link capacity and a flow then exits slow start with ssthresh set to a value above where congestion avoidance would like to adjust it. Signed-off-by: Doug Leith Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 14504da..7cd2226 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -326,6 +326,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) tp->snd_cwnd = tp->snd_cwnd_clamp; + + tp->snd_ssthresh = tcp_current_ssthresh(sk); } /* Wipe the slate clean for the next RTT. */ -- cgit v0.10.2 From 40a9a8299116297429298e8fcee08235134883f7 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 25 Nov 2008 23:29:03 +0200 Subject: iwlwifi: clean key table in iwl_clear_stations_table function This patch cleans uCode key table bit map iwl_clear_stations_table since all stations are cleared also the key table must be. Since the keys are not removed properly on suspend by mac80211 this may result in exhausting key table on resume leading to memory corruption during removal This patch also fixes a memory corruption problem reported in http://marc.info/?l=linux-wireless&m=122641417231586&w=2 and tracked in http://bugzilla.kernel.org/show_bug.cgi?id=12040. When the key is removed a second time the offset is set to 255 - this index is not valid for the ucode_key_table and corrupts the eeprom pointer (which is 255 bits from ucode_key_table). Signed-off-by: Tomas Winkler Signed-off-by: Zhu Yi Reported-by: Carlos R. Mafra Reported-by: Lukas Hejtmanek Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 4c312c5..01a8458 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -290,6 +290,9 @@ void iwl_clear_stations_table(struct iwl_priv *priv) priv->num_stations = 0; memset(priv->stations, 0, sizeof(priv->stations)); + /* clean ucode key table bit map */ + priv->ucode_key_table = 0; + spin_unlock_irqrestore(&priv->sta_lock, flags); } EXPORT_SYMBOL(iwl_clear_stations_table); diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 61797f3..26f7084 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -475,7 +475,7 @@ static int iwl_get_free_ucode_key_index(struct iwl_priv *priv) if (!test_and_set_bit(i, &priv->ucode_key_table)) return i; - return -1; + return WEP_INVALID_OFFSET; } int iwl_send_static_wepkey_cmd(struct iwl_priv *priv, u8 send_if_empty) @@ -620,6 +620,9 @@ static int iwl_set_wep_dynamic_key_info(struct iwl_priv *priv, /* else, we are overriding an existing key => no need to allocated room * in uCode. */ + WARN(priv->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET, + "no space for new kew"); + priv->stations[sta_id].sta.key.key_flags = key_flags; priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; @@ -637,6 +640,7 @@ static int iwl_set_ccmp_dynamic_key_info(struct iwl_priv *priv, { unsigned long flags; __le16 key_flags = 0; + int ret; key_flags |= (STA_KEY_FLG_CCMP | STA_KEY_FLG_MAP_KEY_MSK); key_flags |= cpu_to_le16(keyconf->keyidx << STA_KEY_FLG_KEYID_POS); @@ -664,14 +668,18 @@ static int iwl_set_ccmp_dynamic_key_info(struct iwl_priv *priv, /* else, we are overriding an existing key => no need to allocated room * in uCode. */ + WARN(priv->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET, + "no space for new kew"); + priv->stations[sta_id].sta.key.key_flags = key_flags; priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; + ret = iwl_send_add_sta(priv, &priv->stations[sta_id].sta, CMD_ASYNC); + spin_unlock_irqrestore(&priv->sta_lock, flags); - IWL_DEBUG_INFO("hwcrypto: modify ucode station key info\n"); - return iwl_send_add_sta(priv, &priv->stations[sta_id].sta, CMD_ASYNC); + return ret; } static int iwl_set_tkip_dynamic_key_info(struct iwl_priv *priv, @@ -696,6 +704,9 @@ static int iwl_set_tkip_dynamic_key_info(struct iwl_priv *priv, /* else, we are overriding an existing key => no need to allocated room * in uCode. */ + WARN(priv->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET, + "no space for new kew"); + /* This copy is acutally not needed: we get the key with each TX */ memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key, 16); @@ -734,6 +745,13 @@ int iwl_remove_dynamic_key(struct iwl_priv *priv, return 0; } + if (priv->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET) { + IWL_WARNING("Removing wrong key %d 0x%x\n", + keyconf->keyidx, key_flags); + spin_unlock_irqrestore(&priv->sta_lock, flags); + return 0; + } + if (!test_and_clear_bit(priv->stations[sta_id].sta.key.key_offset, &priv->ucode_key_table)) IWL_ERROR("index %d not used in uCode key table.\n", -- cgit v0.10.2 From b8ddafd759e356c839774a8b87011b768deb53b3 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 27 Nov 2008 13:42:20 +0800 Subject: ipw2200: fix netif_*_queue() removal regression In "ipw2200: Call netif_*_queue() interfaces properly", netif_stop_queue() and netif_wake_queue() were removed with the reason "netif_carrier_{on,off}() handles starting and stopping packet flow into the driver". The patch also removes a valid condition check that ipw_tx_skb() cannot be called if device is not in STATUS_ASSOCIATED state. But netif_carrier_off() doesn't guarantee netdev->hard_start_xmit won't be called because linkwatch event is handled in a delayed workqueue. This caused a kernel oops reported by Frank Seidel: https://bugzilla.novell.com/show_bug.cgi?id=397390 This patch fixes the problem by moving the STATUS_ASSOCIATED check back to ipw_tx_skb(). It also adds a missing netif_carrier_off() call to ipw_disassociate(). Signed-off-by: Zhu Yi Signed-off-by: Chatre, Reinette Tested-by: Frank Seidel Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index dcce3542..7a9f901 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -3897,6 +3897,7 @@ static int ipw_disassociate(void *data) if (!(priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING))) return 0; ipw_send_disassociate(data, 0); + netif_carrier_off(priv->net_dev); return 1; } @@ -10190,6 +10191,9 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb, u16 remaining_bytes; int fc; + if (!(priv->status & STATUS_ASSOCIATED)) + goto drop; + hdr_len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); switch (priv->ieee->iw_mode) { case IW_MODE_ADHOC: -- cgit v0.10.2 From 5cf12e8dc641ef028f0cf9c317a9567e6b794de1 Mon Sep 17 00:00:00 2001 From: Shaddy Baddah Date: Fri, 28 Nov 2008 17:08:10 +1100 Subject: mac80211: use unaligned safe memcmp() in-place of compare_ether_addr() After fixing zd1211rw: use unaligned safe memcmp() in-place of compare_ether_addr(), I started to see kernel log messages detailing unaligned access: Kernel unaligned access at TPC[100f7f44] sta_info_get+0x24/0x68 [mac80211] As with the aforementioned patch, the unaligned access was eminating from a compare_ether_addr() call. Concerned that whilst it was safe to assume that unalignment was the norm for the zd1211rw, and take preventative measures, it may not be the case or acceptable to use the easy fix of changing the call to memcmp(). My research however indicated that it was OK to do this, as there are a few instances where memcmp() is the preferred mechanism for doing mac address comparisons throughout the module. Signed-off-by: Shaddy Baddah Signed-off-by: John W. Linville diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7fef8ea..d254446 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -99,7 +99,7 @@ struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); while (sta) { - if (compare_ether_addr(sta->sta.addr, addr) == 0) + if (memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference(sta->hnext); } -- cgit v0.10.2 From cde6901b7b69557a6f90f3183f76eda581af015e Mon Sep 17 00:00:00 2001 From: Shaddy Baddah Date: Fri, 28 Nov 2008 17:10:45 +1100 Subject: zd1211rw: use unaligned safe memcmp() in-place of compare_ether_addr() Under my 2.6.28-rc6 sparc64, when associating to an AP through my zd1211rw device, I was seeing kernel log messages like (not exact output): Kernel unaligned access at TPC[10129b68] zd_mac_rx+0x144/0x32c [zd1211rw] For the zd1211rw module, on RX, the 80211 packet will be located after the PLCP header in the skb data buffer. The PLCP header being 5 bytes long, the 80211 header will start unaligned from an aligned skb buffer. As per Documentation/unaligned-memory-access.txt, we must replace the not unaligned() safe compare_ether_addr() with memcmp() to protect architectures that require alignment. Signed-off-by: Shaddy Baddah Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index fe1867b..cac732f 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -615,7 +615,7 @@ static int filter_ack(struct ieee80211_hw *hw, struct ieee80211_hdr *rx_hdr, struct ieee80211_hdr *tx_hdr; tx_hdr = (struct ieee80211_hdr *)skb->data; - if (likely(!compare_ether_addr(tx_hdr->addr2, rx_hdr->addr1))) + if (likely(!memcmp(tx_hdr->addr2, rx_hdr->addr1, ETH_ALEN))) { __skb_unlink(skb, q); tx_status(hw, skb, IEEE80211_TX_STAT_ACK, stats->signal, 1); -- cgit v0.10.2