From 93821778def10ec1e69aa3ac10adee975dad4ff3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Sep 2008 11:48:46 -0700 Subject: udp: Fix rcv socket locking The previous patch in response to the recursive locking on IPsec reception is broken as it tries to drop the BH socket lock while in user context. This patch fixes it by shrinking the section protected by the socket lock to sock_queue_rcv_skb only. The only reason we added the lock is for the accounting which happens in that function. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8e42fbb..57e26fa 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int is_udplite = IS_UDPLITE(sk); + int rc; + + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + /* returns: * -1: error * 0: success @@ -989,9 +1010,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) up->encap_rcv != NULL) { int ret; - bh_unlock_sock(sk); ret = (*up->encap_rcv)(sk, skb); - bh_lock_sock(sk); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, @@ -1044,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; - } + rc = 0; - return 0; + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = __udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + return rc; drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); @@ -1092,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = 0; - - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - + int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1195,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = 0; - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); + int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1494,7 +1498,7 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, -- cgit v0.10.2 From a3028b8ed1e1e9930bfa70ce4555fb7f9fad3dcc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 02:48:25 -0700 Subject: sctp: set the skb->ip_summed correctly when sending over loopback. Loopback used to clobber the ip_summed filed which sctp then used to figure out if it needed to do checksumming or not. Now that loopback doesn't do that any more, sctp needs to set the ip_summed field correctly. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/output.c b/net/sctp/output.c index 0dc4a7d..225c712 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -533,7 +533,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!(dst->dev->features & NETIF_F_NO_CSUM)) { crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); crc32 = sctp_end_cksum(crc32); - } + } else + nskb->ip_summed = CHECKSUM_UNNECESSARY; /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. -- cgit v0.10.2 From 0ef46e285c062cbe35d60c0adbff96f530d31c86 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 16:27:38 -0700 Subject: sctp: do not enable peer features if we can't do them. Do not enable peer features like addip and auth, if they are administratively disabled localy. If the peer resports that he supports something that we don't, neither end can use it so enabling it is pointless. This solves a problem when talking to a peer that has auth and addip enabled while we do not. Found by Andrei Pelinescu-Onciul . Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e8ca4e5..fe94f42 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1886,11 +1886,13 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - asoc->peer.auth_capable = 1; + if (sctp_auth_enable) + asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - asoc->peer.asconf_capable = 1; + if (sctp_addip_enable) + asoc->peer.asconf_capable = 1; break; default: break; @@ -2460,6 +2462,9 @@ do_addr_param: break; case SCTP_PARAM_SET_PRIMARY: + if (!sctp_addip_enable) + goto fall_through; + addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); -- cgit v0.10.2 From add52379dde2e5300e2d574b172e62c6cf43b3d3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 16:28:27 -0700 Subject: sctp: Fix oops when INIT-ACK indicates that peer doesn't support AUTH If INIT-ACK is received with SupportedExtensions parameter which indicates that the peer does not support AUTH, the packet will be silently ignore, and sctp_process_init() do cleanup all of the transports in the association. When T1-Init timer is expires, OOPS happen while we try to choose a different init transport. The solution is to only clean up the non-active transports, i.e the ones that the peer added. However, that introduces a problem with sctp_connectx(), because we don't mark the proper state for the transports provided by the user. So, we'll simply mark user-provided transports as ACTIVE. That will allow INIT retransmissions to work properly in the sctp_connectx() context and prevent the crash. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8472b8b..abd51ce 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -599,11 +599,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Check to see if this is a duplicate. */ peer = sctp_assoc_lookup_paddr(asoc, addr); if (peer) { + /* An UNKNOWN state is only set on transports added by + * user in sctp_connectx() call. Such transports should be + * considered CONFIRMED per RFC 4960, Section 5.4. + */ if (peer->state == SCTP_UNKNOWN) { - if (peer_state == SCTP_ACTIVE) - peer->state = SCTP_ACTIVE; - if (peer_state == SCTP_UNCONFIRMED) - peer->state = SCTP_UNCONFIRMED; + peer->state = SCTP_ACTIVE; } return peer; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fe94f42..b599cbba 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2321,12 +2321,10 @@ clean_up: /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); - list_del_init(pos); - sctp_transport_free(transport); + if (transport->state != SCTP_ACTIVE) + sctp_assoc_rm_peer(asoc, transport); } - asoc->peer.transport_count = 0; - nomem: return 0; } -- cgit v0.10.2 From 27ed9ddfde8d2967076c51815e4ce297c4a18139 Mon Sep 17 00:00:00 2001 From: Benjamin Li Date: Thu, 18 Sep 2008 16:46:11 -0700 Subject: bnx2: Promote vector field in bnx2_irq structure from u16 to unsigned int The bnx2 driver stores/uses the irq value from the pci_dev internally. But when it stores the irq value, it has been performing an integer demotion. Because of the recent changes made to arch/x86/kernel/io_apic.c, the new method in creating the irq value (using build_irq_for_pci_dev()) has exposed this bug on x86 systems. Because of this demotion when calling request_irq() from bnx2_request_irq(), the driver would get a return code of -EINVAL. This is because the kernel could not find the requested irq descriptor. By storing the irq value properly, the kernel can find the correct irq descriptor and the bnx2 driver can operate normally. Signed-off-by: Benjamin Li Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index c3c579f..dfacd31 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -6597,7 +6597,7 @@ struct flash_spec { struct bnx2_irq { irq_handler_t handler; - u16 vector; + unsigned int vector; u8 requested; char name[16]; }; -- cgit v0.10.2 From f55c21fd9a92a444e55ad1ca4e4732d56661bf2e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 13 Sep 2008 13:10:31 -0700 Subject: forcedeth: call restore mac addr in nv_shutdown path after | commit f735a2a1a4f2a0f5cd823ce323e82675990469e2 | Author: Tobias Diedrich | Date: Sun May 18 15:02:37 2008 +0200 | | [netdrvr] forcedeth: setup wake-on-lan before shutting down | | When hibernating in 'shutdown' mode, after saving the image the suspend hook | is not called again. | However, if the device is in promiscous mode, wake-on-lan will not work. | This adds a shutdown hook to setup wake-on-lan before the final shutdown. | | Signed-off-by: Tobias Diedrich | Signed-off-by: Jeff Garzik my servers with nvidia ck804 and mcp55 will reverse mac address with kexec. it turns out that we need to restore the mac addr in nv_shutdown(). [akpm@linux-foundation.org: fix typo in printk] Signed-off-by: Yinghai Lu Cc: Tobias Diedrich Cc: Ayaz Abdulla Cc: Jeff Garzik Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 0b6ecef..eeb55ed 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5643,6 +5643,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i dev->dev_addr[4] = (np->orig_mac[0] >> 8) & 0xff; dev->dev_addr[5] = (np->orig_mac[0] >> 0) & 0xff; writel(txreg|NVREG_TRANSMITPOLL_MAC_ADDR_REV, base + NvRegTransmitPoll); + printk(KERN_DEBUG "nv_probe: set workaround bit for reversed mac addr\n"); } memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); @@ -5890,14 +5891,12 @@ static void nv_restore_phy(struct net_device *dev) } } -static void __devexit nv_remove(struct pci_dev *pci_dev) +static void nv_restore_mac_addr(struct pci_dev *pci_dev) { struct net_device *dev = pci_get_drvdata(pci_dev); struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); - unregister_netdev(dev); - /* special op: write back the misordered MAC address - otherwise * the next nv_probe would see a wrong address. */ @@ -5905,6 +5904,15 @@ static void __devexit nv_remove(struct pci_dev *pci_dev) writel(np->orig_mac[1], base + NvRegMacAddrB); writel(readl(base + NvRegTransmitPoll) & ~NVREG_TRANSMITPOLL_MAC_ADDR_REV, base + NvRegTransmitPoll); +} + +static void __devexit nv_remove(struct pci_dev *pci_dev) +{ + struct net_device *dev = pci_get_drvdata(pci_dev); + + unregister_netdev(dev); + + nv_restore_mac_addr(pci_dev); /* restore any phy related changes */ nv_restore_phy(dev); @@ -5975,6 +5983,8 @@ static void nv_shutdown(struct pci_dev *pdev) if (netif_running(dev)) nv_close(dev); + nv_restore_mac_addr(pdev); + pci_disable_device(pdev); if (system_state == SYSTEM_POWER_OFF) { if (pci_enable_wake(pdev, PCI_D3cold, np->wolenabled)) -- cgit v0.10.2 From 78566fecbb12a7616ae9a88b2ffbc8062c4a89e3 Mon Sep 17 00:00:00 2001 From: Christopher Li Date: Fri, 5 Sep 2008 14:04:05 -0700 Subject: e1000: prevent corruption of EEPROM/NVM Andrey reports e1000 corruption, and that a patch in vmware's ESX fixed it. The EEPROM corruption is triggered by concurrent access of the EEPROM read/write. Putting a lock around it solve the problem. [akpm@linux-foundation.org: use DEFINE_SPINLOCK to avoid confusing lockdep] Signed-off-by: Christopher Li Reported-by: Andrey Borzenkov Cc: Zach Amsden Cc: Pratap Subrahmanyam Cc: Jeff Kirsher Cc: Jesse Brandeburg Cc: Bruce Allan Cc: PJ Waskiewicz Cc: John Ronciak Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index 9d6edf3..d04eef53 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -144,6 +144,8 @@ static s32 e1000_host_if_read_cookie(struct e1000_hw *hw, u8 *buffer); static u8 e1000_calculate_mng_checksum(char *buffer, u32 length); static s32 e1000_configure_kmrn_for_10_100(struct e1000_hw *hw, u16 duplex); static s32 e1000_configure_kmrn_for_1000(struct e1000_hw *hw); +static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); /* IGP cable length table */ static const @@ -168,6 +170,8 @@ u16 e1000_igp_2_cable_length_table[IGP02E1000_AGC_LENGTH_TABLE_SIZE] = 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121, 124}; +static DEFINE_SPINLOCK(e1000_eeprom_lock); + /****************************************************************************** * Set the phy type member in the hw struct. * @@ -4904,6 +4908,15 @@ static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw) *****************************************************************************/ s32 e1000_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { + s32 ret; + spin_lock(&e1000_eeprom_lock); + ret = e1000_do_read_eeprom(hw, offset, words, data); + spin_unlock(&e1000_eeprom_lock); + return ret; +} + +static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ struct e1000_eeprom_info *eeprom = &hw->eeprom; u32 i = 0; @@ -5236,6 +5249,16 @@ s32 e1000_update_eeprom_checksum(struct e1000_hw *hw) *****************************************************************************/ s32 e1000_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { + s32 ret; + spin_lock(&e1000_eeprom_lock); + ret = e1000_do_write_eeprom(hw, offset, words, data); + spin_unlock(&e1000_eeprom_lock); + return ret; +} + + +static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ struct e1000_eeprom_info *eeprom = &hw->eeprom; s32 status = 0; -- cgit v0.10.2 From e7272403d2f9be3dbb7cc185fcc390e781b1af6b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 8 Aug 2008 00:18:04 +0200 Subject: e100: Use pci_pme_active to clear PME_Status and disable PME# Currently e100 uses pci_enable_wake() to clear pending wake-up events and disable PME# during intitialization, but that function is not suitable for this purpose, because it immediately returns error code if device_may_wakeup() returns false for given device. Make e100 use pci_pme_active(), which carries out exactly the required operations, instead. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jeff Garzik diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 453115a..5cf78d6 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -2738,9 +2738,7 @@ static int __devinit e100_probe(struct pci_dev *pdev, nic->flags |= wol_magic; /* ack any pending wake events, disable PME */ - err = pci_enable_wake(pdev, 0, 0); - if (err) - DPRINTK(PROBE, ERR, "Error clearing wake event\n"); + pci_pme_active(pdev, false); strcpy(netdev->name, "eth%d"); if((err = register_netdev(netdev))) { -- cgit v0.10.2