From 2d3f6ccc4ea5c74d4b4af1b47c56b4cff4bbfcb7 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 4 Jul 2012 20:38:19 +0200 Subject: batman-adv: check incoming packet type for bla If the gateway functionality is used, some broadcast packets (DHCP requests) may be transmitted as unicast packets. As the bridge loop avoidance code now only considers the payload Ethernet destination, it may drop the DHCP request for clients which are claimed by other backbone gateways, because it falsely infers from the broadcast address that the right backbone gateway should havehandled the broadcast. Fix this by checking and delegating the batman-adv packet type used for transmission. Reported-by: Guido Iribarren Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8bf9751..c5863f4 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1351,6 +1351,7 @@ void bla_free(struct bat_priv *bat_priv) * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame + * @is_bcast: the packet came in a broadcast packet type. * * bla_rx avoidance checks if: * * we have to race for a claim @@ -1361,7 +1362,8 @@ void bla_free(struct bat_priv *bat_priv) * process the skb. * */ -int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid) +int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid, + bool is_bcast) { struct ethhdr *ethhdr; struct claim search_claim, *claim = NULL; @@ -1380,7 +1382,7 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid) if (unlikely(atomic_read(&bat_priv->bla_num_requests))) /* don't allow broadcasts while requests are in flight */ - if (is_multicast_ether_addr(ethhdr->h_dest)) + if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) goto handled; memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN); @@ -1406,8 +1408,13 @@ int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid) } /* if it is a broadcast ... */ - if (is_multicast_ether_addr(ethhdr->h_dest)) { - /* ... drop it. the responsible gateway is in charge. */ + if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) { + /* ... drop it. the responsible gateway is in charge. + * + * We need to check is_bcast because with the gateway + * feature, broadcasts (like DHCP requests) may be sent + * using a unicast packet type. + */ goto handled; } else { /* seems the client considers us as its best gateway. diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index e39f93a..dc5227b 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -23,7 +23,8 @@ #define _NET_BATMAN_ADV_BLA_H_ #ifdef CONFIG_BATMAN_ADV_BLA -int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid); +int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid, + bool is_bcast); int bla_tx(struct bat_priv *bat_priv, struct sk_buff *skb, short vid); int bla_is_backbone_gw(struct sk_buff *skb, struct orig_node *orig_node, int hdr_size); @@ -41,7 +42,7 @@ void bla_free(struct bat_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int bla_rx(struct bat_priv *bat_priv, struct sk_buff *skb, - short vid) + short vid, bool is_bcast) { return 0; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6e2530b..a0ec0e4 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -256,7 +256,11 @@ void interface_rx(struct net_device *soft_iface, struct bat_priv *bat_priv = netdev_priv(soft_iface); struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; + struct batman_header *batadv_header = (struct batman_header *)skb->data; short vid __maybe_unused = -1; + bool is_bcast; + + is_bcast = (batadv_header->packet_type == BAT_BCAST); /* check if enough space is available for pulling, and pull */ if (!pskb_may_pull(skb, hdr_size)) @@ -302,7 +306,7 @@ void interface_rx(struct net_device *soft_iface, /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ - if (bla_rx(bat_priv, skb, vid)) + if (bla_rx(bat_priv, skb, vid, is_bcast)) goto out; netif_rx(skb); -- cgit v0.10.2 From 3da947b2693993d5f6138f005d2625e9387c9618 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 2 Jul 2012 01:29:55 +0000 Subject: ieee802154: verify packet size before trying to allocate it Currently when sending data over datagram, the send function will attempt to allocate any size passed on from the userspace. We should make sure that this size is checked and limited. We'll limit it to the MTU of the device, which is checked later anyway. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 6fbb2ad..1670561 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -230,6 +230,12 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, mtu = dev->mtu; pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EINVAL; + goto out_dev; + } + hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, @@ -258,12 +264,6 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, if (err < 0) goto out_skb; - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EINVAL; - goto out_skb; - } - skb->dev = dev; skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); -- cgit v0.10.2 From acfa9e94e2f06f8911a1d2f257880af4ad945eef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Jul 2012 08:36:12 +0000 Subject: net: dont use __netdev_alloc_skb for bounce buffer commit a1c7fff7e1 (net: netdev_alloc_skb() use build_skb()) broke b44 on some 64bit machines. It appears b44 and b43 use __netdev_alloc_skb() instead of alloc_skb() for their bounce buffers. There is no need to add an extra NET_SKB_PAD reservation for bounce buffers : - In TX path, NET_SKB_PAD is useless - In RX path in b44, we force a copy of incoming frames if GFP_DMA allocations were needed. Reported-and-bisected-by: Stefan Bader Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 46b8b7d..d09c6b5 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -656,7 +656,7 @@ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked) dma_unmap_single(bp->sdev->dma_dev, mapping, RX_PKT_BUF_SZ, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); - skb = __netdev_alloc_skb(bp->dev, RX_PKT_BUF_SZ, GFP_ATOMIC|GFP_DMA); + skb = alloc_skb(RX_PKT_BUF_SZ, GFP_ATOMIC | GFP_DMA); if (skb == NULL) return -ENOMEM; mapping = dma_map_single(bp->sdev->dma_dev, skb->data, @@ -967,7 +967,7 @@ static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_unmap_single(bp->sdev->dma_dev, mapping, len, DMA_TO_DEVICE); - bounce_skb = __netdev_alloc_skb(dev, len, GFP_ATOMIC | GFP_DMA); + bounce_skb = alloc_skb(len, GFP_ATOMIC | GFP_DMA); if (!bounce_skb) goto err_out; diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c index f1f8bd0..c8baf02 100644 --- a/drivers/net/wireless/b43legacy/dma.c +++ b/drivers/net/wireless/b43legacy/dma.c @@ -1072,7 +1072,7 @@ static int dma_tx_fragment(struct b43legacy_dmaring *ring, meta->dmaaddr = map_descbuffer(ring, skb->data, skb->len, 1); /* create a bounce buffer in zone_dma on mapping failure. */ if (b43legacy_dma_mapping_error(ring, meta->dmaaddr, skb->len, 1)) { - bounce_skb = __dev_alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); + bounce_skb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); if (!bounce_skb) { ring->current_slot = old_top_slot; ring->used_slots = old_used_slots; -- cgit v0.10.2 From b94e52f62683dc0b00c6d1b58b80929a078c0fd5 Mon Sep 17 00:00:00 2001 From: Cloud Ren Date: Tue, 3 Jul 2012 16:51:48 +0000 Subject: atl1c: fix issue of transmit queue 0 timed out some people report atl1c could cause system hang with following kernel trace info: --------------------------------------- WARNING: at.../net/sched/sch_generic.c:258 dev_watchdog+0x1db/0x1d0() ... NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out ... --------------------------------------- This is caused by netif_stop_queue calling when cable Link is down. So remove netif_stop_queue, because link_watch will take it over. Signed-off-by: xiong Cc: stable Signed-off-by: Cloud Ren Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 9cc1570..1f78b63 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -261,7 +261,6 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter) if ((phy_data & BMSR_LSTATUS) == 0) { /* link down */ netif_carrier_off(netdev); - netif_stop_queue(netdev); hw->hibernate = true; if (atl1c_reset_mac(hw) != 0) if (netif_msg_hw(adapter)) -- cgit v0.10.2 From 960fb66e520a405dde39ff883f17ff2669c13d85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Jul 2012 20:55:21 +0000 Subject: netem: add limitation to reordered packets Fix two netem bugs : 1) When a frame was dropped by tfifo_enqueue(), drop counter was incremented twice. 2) When reordering is triggered, we enqueue a packet without checking queue limit. This can OOM pretty fast when this is repeated enough, since skbs are orphaned, no socket limit can help in this situation. Signed-off-by: Eric Dumazet Cc: Mark Gordon Cc: Andreas Terzis Cc: Yuchung Cheng Cc: Hagen Paul Pfeifer Signed-off-by: David S. Miller diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a2a95aa..c412ad0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -331,29 +331,22 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche return PSCHED_NS2TICKS(ticks); } -static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct sk_buff_head *list = &sch->q; psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; - struct sk_buff *skb; - - if (likely(skb_queue_len(list) < sch->limit)) { - skb = skb_peek_tail(list); - /* Optimize for add at tail */ - if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) - return qdisc_enqueue_tail(nskb, sch); + struct sk_buff *skb = skb_peek_tail(list); - skb_queue_reverse_walk(list, skb) { - if (tnext >= netem_skb_cb(skb)->time_to_send) - break; - } + /* Optimize for add at tail */ + if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) + return __skb_queue_tail(list, nskb); - __skb_queue_after(list, skb, nskb); - sch->qstats.backlog += qdisc_pkt_len(nskb); - return NET_XMIT_SUCCESS; + skb_queue_reverse_walk(list, skb) { + if (tnext >= netem_skb_cb(skb)->time_to_send) + break; } - return qdisc_reshape_fail(nskb, sch); + __skb_queue_after(list, skb, nskb); } /* @@ -368,7 +361,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; - int ret; int count = 1; /* Random duplication */ @@ -419,6 +411,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } + if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) + return qdisc_reshape_fail(skb, sch); + + sch->qstats.backlog += qdisc_pkt_len(skb); + cb = netem_skb_cb(skb); if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ @@ -450,7 +447,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) cb->time_to_send = now + delay; ++q->counter; - ret = tfifo_enqueue(skb, sch); + tfifo_enqueue(skb, sch); } else { /* * Do re-ordering by putting one out of N packets at the front @@ -460,16 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->counter = 0; __skb_queue_head(&sch->q, skb); - sch->qstats.backlog += qdisc_pkt_len(skb); sch->qstats.requeues++; - ret = NET_XMIT_SUCCESS; - } - - if (ret != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - return ret; - } } return NET_XMIT_SUCCESS; -- cgit v0.10.2 From d4e41649434cd6db2e69783130cba81886dac97f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 4 Jul 2012 02:00:25 +0000 Subject: ixgbe: DCB and SR-IOV can not co-exist and will cause hangs DCB and SR-IOV cannot currently be enabled at the same time as the queueing schemes are incompatible. If they are both enabled it will result in Tx hangs since only the first Tx queue will be able to transmit any traffic. This simple fix for this is to block us from enabling TCs in ixgbe_setup_tc if SR-IOV is enabled. This change will be reverted once we can support SR-IOV and DCB coexistence. Signed-off-by: Alexander Duyck Acked-by: John Fastabend Tested-by: Phil Schmitt Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 18ca3bc..e242104 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6647,6 +6647,11 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) return -EINVAL; } + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + e_err(drv, "Enable failed, SR-IOV enabled\n"); + return -EINVAL; + } + /* Hardware supports up to 8 traffic classes */ if (tc > adapter->dcb_cfg.num_tcs.pg_tcs || (hw->mac.type == ixgbe_mac_82598EB && -- cgit v0.10.2 From b93984c9afacd4fe32b785d52a93660d91202b10 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 4 Jul 2012 12:06:16 +0000 Subject: netdev/phy: Fixup lockdep warnings in mdio-mux.c With lockdep enabled we get: ============================================= [ INFO: possible recursive locking detected ] 3.4.4-Cavium-Octeon+ #313 Not tainted --------------------------------------------- kworker/u:1/36 is trying to acquire lock: (&bus->mdio_lock){+.+...}, at: [] mdio_mux_read+0x38/0xa0 but task is already holding lock: (&bus->mdio_lock){+.+...}, at: [] mdiobus_read+0x44/0x88 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&bus->mdio_lock); lock(&bus->mdio_lock); *** DEADLOCK *** May be due to missing lock nesting notation . . . This is a false positive, since we are indeed using 'nested' locking, we need to use mutex_lock_nested(). Now in theory we can stack multiple MDIO multiplexers, but that would require passing the nesting level (which is difficult to know) to mutex_lock_nested(). Instead we assume the simple case of a single level of nesting. Since these are only warning messages, it isn't so important to solve the general case. Signed-off-by: David Daney Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 39ea067..5c12018 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -46,7 +46,13 @@ static int mdio_mux_read(struct mii_bus *bus, int phy_id, int regnum) struct mdio_mux_parent_bus *pb = cb->parent; int r; - mutex_lock(&pb->mii_bus->mdio_lock); + /* In theory multiple mdio_mux could be stacked, thus creating + * more than a single level of nesting. But in practice, + * SINGLE_DEPTH_NESTING will cover the vast majority of use + * cases. We use it, instead of trying to handle the general + * case. + */ + mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING); r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); if (r) goto out; @@ -71,7 +77,7 @@ static int mdio_mux_write(struct mii_bus *bus, int phy_id, int r; - mutex_lock(&pb->mii_bus->mdio_lock); + mutex_lock_nested(&pb->mii_bus->mdio_lock, SINGLE_DEPTH_NESTING); r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); if (r) goto out; -- cgit v0.10.2 From b761c9b1f4f69eb53fb6147547a1ab25237a93b3 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 4 Jul 2012 23:28:40 +0000 Subject: cgroup: fix panic in netprio_cgroup we set max_prioidx to the first zero bit index of prioidx_map in function get_prioidx. So when we delete the low index netprio cgroup and adding a new netprio cgroup again,the max_prioidx will be set to the low index. when we set the high index cgroup's net_prio.ifpriomap,the function write_priomap will call update_netdev_tables to alloc memory which size is sizeof(struct netprio_map) + sizeof(u32) * (max_prioidx + 1), so the size of array that map->priomap point to is max_prioidx +1, which is low than what we actually need. fix this by adding check in get_prioidx,only set max_prioidx when max_prioidx low than the new prioidx. Signed-off-by: Gao feng Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5b8aa2f..aa907ed 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -49,8 +49,9 @@ static int get_prioidx(u32 *prio) return -ENOSPC; } set_bit(prioidx, prioidx_map); + if (atomic_read(&max_prioidx) < prioidx) + atomic_set(&max_prioidx, prioidx); spin_unlock_irqrestore(&prioidx_map_lock, flags); - atomic_set(&max_prioidx, prioidx); *prio = prioidx; return 0; } -- cgit v0.10.2 From 6fecd35d4cd79fc75e8290abb86734c18500d2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 5 Jul 2012 01:13:33 +0000 Subject: net: qmi_wwan: add ZTE MF60 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding a device with limited QMI support. It does not support normal QMI_WDS commands for connection management. Instead, sending a QMI_CTL SET_INSTANCE_ID command is required to enable the network interface: 01 0f 00 00 00 00 00 00 20 00 04 00 01 01 00 00 A number of QMI_DMS and QMI_NAS commands are also supported for optional device management. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b01960f..a051ced 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -346,6 +346,15 @@ static const struct driver_info qmi_wwan_force_int1 = { .data = BIT(1), /* interface whitelist bitmap */ }; +static const struct driver_info qmi_wwan_force_int2 = { + .description = "Qualcomm WWAN/QMI device", + .flags = FLAG_WWAN, + .bind = qmi_wwan_bind_shared, + .unbind = qmi_wwan_unbind_shared, + .manage_power = qmi_wwan_manage_power, + .data = BIT(2), /* interface whitelist bitmap */ +}; + static const struct driver_info qmi_wwan_force_int3 = { .description = "Qualcomm WWAN/QMI device", .flags = FLAG_WWAN, @@ -498,6 +507,15 @@ static const struct usb_device_id products[] = { .bInterfaceProtocol = 0xff, .driver_info = (unsigned long)&qmi_wwan_force_int4, }, + { /* ZTE MF60 */ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x19d2, + .idProduct = 0x1402, + .bInterfaceClass = 0xff, + .bInterfaceSubClass = 0xff, + .bInterfaceProtocol = 0xff, + .driver_info = (unsigned long)&qmi_wwan_force_int2, + }, { /* Sierra Wireless MC77xx in QMI mode */ .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x1199, -- cgit v0.10.2 From 054581e6c1eb314d54d4747fba545e9802be29da Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 5 Jul 2012 14:21:55 +0000 Subject: cnic: Don't use netdev->base_addr commit c0357e975afdbbedab5c662d19bef865f02adc17 bnx2: stop using net_device.{base_addr, irq}. removed netdev->base_addr so we need to update cnic to get the MMIO base address from pci_resource_start(). Otherwise, mmap of the uio device will fail. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index c95e7b5..3c95065 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -1053,12 +1053,13 @@ static int cnic_init_uio(struct cnic_dev *dev) uinfo = &udev->cnic_uinfo; - uinfo->mem[0].addr = dev->netdev->base_addr; + uinfo->mem[0].addr = pci_resource_start(dev->pcidev, 0); uinfo->mem[0].internal_addr = dev->regview; - uinfo->mem[0].size = dev->netdev->mem_end - dev->netdev->mem_start; uinfo->mem[0].memtype = UIO_MEM_PHYS; if (test_bit(CNIC_F_BNX2_CLASS, &dev->flags)) { + uinfo->mem[0].size = MB_GET_CID_ADDR(TX_TSS_CID + + TX_MAX_TSS_RINGS + 1); uinfo->mem[1].addr = (unsigned long) cp->status_blk.gen & PAGE_MASK; if (cp->ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) @@ -1068,6 +1069,8 @@ static int cnic_init_uio(struct cnic_dev *dev) uinfo->name = "bnx2_cnic"; } else if (test_bit(CNIC_F_BNX2X_CLASS, &dev->flags)) { + uinfo->mem[0].size = pci_resource_len(dev->pcidev, 0); + uinfo->mem[1].addr = (unsigned long) cp->bnx2x_def_status_blk & PAGE_MASK; uinfo->mem[1].size = sizeof(*cp->bnx2x_def_status_blk); -- cgit v0.10.2 From a73f89a61f92b364f0b4a3be412b5b70553afc23 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 29 Jun 2012 09:42:28 +0000 Subject: netfilter: ipset: timeout fixing bug broke SET target special timeout value The patch "127f559 netfilter: ipset: fix timeout value overflow bug" broke the SET target when no timeout was specified. Reported-by: Jean-Philippe Menil Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 035960e..c6f7db7 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -16,6 +16,7 @@ #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -310,7 +311,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) info->del_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ - if (add_opt.timeout > UINT_MAX/MSEC_PER_SEC) + if (add_opt.timeout != IPSET_NO_TIMEOUT && + add_opt.timeout > UINT_MAX/MSEC_PER_SEC) add_opt.timeout = UINT_MAX/MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); -- cgit v0.10.2 From 6bd0405bb4196b44f1acb7a58f11382cdaf6f7f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Jul 2012 15:42:10 +0200 Subject: netfilter: nf_ct_ecache: fix crash with multiple containers, one shutting down Hans reports that he's still hitting: BUG: unable to handle kernel NULL pointer dereference at 000000000000027c IP: [] netlink_has_listeners+0xb/0x60 PGD 0 Oops: 0000 [#3] PREEMPT SMP CPU 0 It happens when adding a number of containers with do: nfct_query(h, NFCT_Q_CREATE, ct); and most likely one namespace shuts down. this problem was supposed to be fixed by: 70e9942 netfilter: nf_conntrack: make event callback registration per-netns Still, it was missing one rcu_access_pointer to check if the callback is set or not. Reported-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index a88fb69..e1ce104 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -78,7 +78,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; - if (net->ct.nf_conntrack_event_cb == NULL) + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return; e = nf_ct_ecache_find(ct); -- cgit v0.10.2 From 472dd35ccbd999a6cb2994c318ca16edf65c4359 Mon Sep 17 00:00:00 2001 From: Thomas Huehn Date: Fri, 29 Jun 2012 06:26:27 -0700 Subject: mac80211: correct size the argument to kzalloc in minstrel_ht msp has type struct minstrel_ht_sta_priv not struct minstrel_ht_sta. (This incorporates the fixup originally posted as "mac80211: fix kzalloc memory corruption introduced in minstrel_ht". -- JWL) Reported-by: Fengguang Wu Reported-by: Dan Carpenter Signed-off-by: Thomas Huehn Acked-by: Johannes Berg Signed-off-by: John W. Linville diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 2d1acc6..f9e51ef 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -809,7 +809,7 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) max_rates = sband->n_bitrates; } - msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp); + msp = kzalloc(sizeof(*msp), gfp); if (!msp) return NULL; -- cgit v0.10.2 From 147f20e316f3949f3f5ffe6c8658e9fe1c6ceb23 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 30 Jun 2012 11:56:47 +0200 Subject: NFC: Prevent NULL deref when getting socket name llcp_sock_getname can be called without a device attached to the nfc_llcp_sock. This would lead to the following BUG: [ 362.341807] BUG: unable to handle kernel NULL pointer dereference at (null) [ 362.341815] IP: [] llcp_sock_getname+0x75/0xc0 [ 362.341818] PGD 31b35067 PUD 30631067 PMD 0 [ 362.341821] Oops: 0000 [#627] PREEMPT SMP DEBUG_PAGEALLOC [ 362.341826] CPU 3 [ 362.341827] Pid: 7816, comm: trinity-child55 Tainted: G D W 3.5.0-rc4-next-20120628-sasha-00005-g9f23eb7 #479 [ 362.341831] RIP: 0010:[] [] llcp_sock_getname+0x75/0xc0 [ 362.341832] RSP: 0018:ffff8800304fde88 EFLAGS: 00010286 [ 362.341834] RAX: 0000000000000000 RBX: ffff880033cb8000 RCX: 0000000000000001 [ 362.341835] RDX: ffff8800304fdec4 RSI: ffff8800304fdec8 RDI: ffff8800304fdeda [ 362.341836] RBP: ffff8800304fdea8 R08: 7ebcebcb772b7ffb R09: 5fbfcb9c35bdfd53 [ 362.341838] R10: 4220020c54326244 R11: 0000000000000246 R12: ffff8800304fdec8 [ 362.341839] R13: ffff8800304fdec4 R14: ffff8800304fdec8 R15: 0000000000000044 [ 362.341841] FS: 00007effa376e700(0000) GS:ffff880035a00000(0000) knlGS:0000000000000000 [ 362.341843] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 362.341844] CR2: 0000000000000000 CR3: 0000000030438000 CR4: 00000000000406e0 [ 362.341851] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 362.341856] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 362.341858] Process trinity-child55 (pid: 7816, threadinfo ffff8800304fc000, task ffff880031270000) [ 362.341858] Stack: [ 362.341862] ffff8800304fdea8 ffff880035156780 0000000000000000 0000000000001000 [ 362.341865] ffff8800304fdf78 ffffffff83183b40 00000000304fdec8 0000006000000000 [ 362.341868] ffff8800304f0027 ffffffff83729649 ffff8800304fdee8 ffff8800304fdf48 [ 362.341869] Call Trace: [ 362.341874] [] sys_getpeername+0xa0/0x110 [ 362.341877] [] ? _raw_spin_unlock_irq+0x59/0x80 [ 362.341882] [] ? do_setitimer+0x23b/0x290 [ 362.341886] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 362.341889] [] system_call_fastpath+0x16/0x1b [ 362.341921] Code: 84 00 00 00 00 00 b8 b3 ff ff ff 48 85 db 74 54 66 41 c7 04 24 27 00 49 8d 7c 24 12 41 c7 45 00 60 00 00 00 48 8b 83 28 05 00 00 <8b> 00 41 89 44 24 04 0f b6 83 41 05 00 00 41 88 44 24 10 0f b6 [ 362.341924] RIP [] llcp_sock_getname+0x75/0xc0 [ 362.341925] RSP [ 362.341926] CR2: 0000000000000000 [ 362.341928] ---[ end trace 6d450e935ee18bf3 ]--- Signed-off-by: Sasha Levin Signed-off-by: John W. Linville diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 17a707d..e06d458 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -292,7 +292,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr, pr_debug("%p\n", sk); - if (llcp_sock == NULL) + if (llcp_sock == NULL || llcp_sock->dev == NULL) return -EBADFD; addr->sa_family = AF_NFC; -- cgit v0.10.2 From 10a9109f2705fdc3caa94d768b2559587a9a050c Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 2 Jul 2012 14:42:03 +0300 Subject: mac80211: destroy assoc_data correctly if assoc fails If association failed due to internal error (e.g. no supported rates IE), we call ieee80211_destroy_assoc_data() with assoc=true, while we actually reject the association. This results in the BSSID not being zeroed out. After passing assoc=false, we no longer have to call sta_info_destroy_addr() explicitly. While on it, move the "associated" message after the assoc_success check. Cc: stable@vger.kernel.org [3.4+] Signed-off-by: Eliad Peller Reviewed-by: Johannes Berg Signed-off-by: John W. Linville diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a4bb856..0db5d34 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2174,15 +2174,13 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, sdata->name, mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - printk(KERN_DEBUG "%s: associated\n", sdata->name); - if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ - ieee80211_destroy_assoc_data(sdata, true); - sta_info_destroy_addr(sdata, mgmt->bssid); + ieee80211_destroy_assoc_data(sdata, false); cfg80211_put_bss(*bss); return RX_MGMT_CFG80211_ASSOC_TIMEOUT; } + printk(KERN_DEBUG "%s: associated\n", sdata->name); /* * destroy assoc_data afterwards, as otherwise an idle -- cgit v0.10.2 From b3190466b0b6d336eddd10319c64a3ce3029b3ff Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Tue, 3 Jul 2012 15:53:13 -0700 Subject: mwifiex: fix Coverity SCAN CID 709078: Resource leak (RESOURCE_LEAK) > *. CID 709078: Resource leak (RESOURCE_LEAK) > - drivers/net/wireless/mwifiex/cfg80211.c, line: 935 > Assigning: "bss_cfg" = storage returned from "kzalloc(132UL, 208U)" > - but was not free > drivers/net/wireless/mwifiex/cfg80211.c:935 Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index ce61b6f..5c7fd18 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -958,6 +958,7 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy, case NL80211_HIDDEN_SSID_ZERO_CONTENTS: /* firmware doesn't support this type of hidden SSID */ default: + kfree(bss_cfg); return -EINVAL; } -- cgit v0.10.2 From efd821182cec8c92babef6e00a95066d3252fda4 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 4 Jul 2012 13:10:02 +0200 Subject: rt2x00usb: fix indexes ordering on RX queue kick On rt2x00_dmastart() we increase index specified by Q_INDEX and on rt2x00_dmadone() we increase index specified by Q_INDEX_DONE. So entries between Q_INDEX_DONE and Q_INDEX are those we currently process in the hardware. Entries between Q_INDEX and Q_INDEX_DONE are those we can submit to the hardware. According to that fix rt2x00usb_kick_queue(), as we need to submit RX entries that are not processed by the hardware. It worked before only for empty queue, otherwise was broken. Note that for TX queues indexes ordering are ok. We need to kick entries that have filled skb, but was not submitted to the hardware, i.e. started from Q_INDEX_DONE and have ENTRY_DATA_PENDING bit set. From practical standpoint this fixes RX queue stall, usually reproducible in AP mode, like for example reported here: https://bugzilla.redhat.com/show_bug.cgi?id=828824 Reported-and-tested-by: Franco Miceli Reported-and-tested-by: Tom Horsley Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index d357d1e..74ecc33 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -436,8 +436,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue) case QID_RX: if (!rt2x00queue_full(queue)) rt2x00queue_for_each_entry(queue, - Q_INDEX_DONE, Q_INDEX, + Q_INDEX_DONE, NULL, rt2x00usb_kick_rx_entry); break; -- cgit v0.10.2 From c2ca7d92ed4bbd779516beb6eb226e19f7f7ab0f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 4 Jul 2012 13:20:20 +0200 Subject: iwlegacy: always monitor for stuck queues This is iwlegacy version of: commit 342bbf3fee2fa9a18147e74b2e3c4229a4564912 Author: Johannes Berg Date: Sun Mar 4 08:50:46 2012 -0800 iwlwifi: always monitor for stuck queues If we only monitor while associated, the following can happen: - we're associated, and the queue stuck check runs, setting the queue "touch" time to X - we disassociate, stopping the monitoring, which leaves the time set to X - almost 2s later, we associate, and enqueue a frame - before the frame is transmitted, we monitor for stuck queues, and find the time set to X, although it is now later than X + 2000ms, so we decide that the queue is stuck and erroneously restart the device Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index cbf2dc1..5d4807c 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -4767,14 +4767,12 @@ il_bg_watchdog(unsigned long data) return; /* monitor and check for other stuck queues */ - if (il_is_any_associated(il)) { - for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) { - /* skip as we already checked the command queue */ - if (cnt == il->cmd_queue) - continue; - if (il_check_stuck_queue(il, cnt)) - return; - } + for (cnt = 0; cnt < il->hw_params.max_txq_num; cnt++) { + /* skip as we already checked the command queue */ + if (cnt == il->cmd_queue) + continue; + if (il_check_stuck_queue(il, cnt)) + return; } mod_timer(&il->watchdog, -- cgit v0.10.2 From b48d96652626b315229b1b82c6270eead6a77a6d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 4 Jul 2012 13:59:08 +0200 Subject: iwlegacy: don't mess up the SCD when removing a key When we remove a key, we put a key index which was supposed to tell the fw that we are actually removing the key. But instead the fw took that index as a valid index and messed up the SRAM of the device. This memory corruption on the device mangled the data of the SCD. The impact on the user is that SCD queue 2 got stuck after having removed keys. Reported-by: Paul Bolle Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 509301a..ff5d689 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -3405,7 +3405,7 @@ il4965_remove_dynamic_key(struct il_priv *il, return 0; } - if (il->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET) { + if (il->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_INVALID) { IL_WARN("Removing wrong key %d 0x%x\n", keyconf->keyidx, key_flags); spin_unlock_irqrestore(&il->sta_lock, flags); @@ -3420,7 +3420,7 @@ il4965_remove_dynamic_key(struct il_priv *il, memset(&il->stations[sta_id].sta.key, 0, sizeof(struct il4965_keyinfo)); il->stations[sta_id].sta.key.key_flags = STA_KEY_FLG_NO_ENC | STA_KEY_FLG_INVALID; - il->stations[sta_id].sta.key.key_offset = WEP_INVALID_OFFSET; + il->stations[sta_id].sta.key.key_offset = keyconf->hw_key_idx; il->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK; il->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK; -- cgit v0.10.2 From 8e83989106562326bfd6aaf92174fe138efd026b Mon Sep 17 00:00:00 2001 From: Deepak Sikri Date: Sun, 8 Jul 2012 21:14:45 +0000 Subject: stmmac: Fix for nfs hang on multiple reboot It was observed that during multiple reboots nfs hangs. The status of receive descriptors shows that all the descriptors were in control of CPU, and none were assigned to DMA. Also the DMA status register confirmed that the Rx buffer is unavailable. This patch adds the fix for the same by adding the memory barriers to ascertain that the all instructions before enabling the Rx or Tx DMA are completed which involves the proper setting of the ownership bit in DMA descriptors. Signed-off-by: Deepak Sikri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 51b3b68..ea3003e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1212,6 +1212,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion); wmb(); priv->hw->desc->set_tx_owner(desc); + wmb(); } /* Interrupt on completition only for the latest segment */ @@ -1227,6 +1228,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* To avoid raise condition */ priv->hw->desc->set_tx_owner(first); + wmb(); priv->cur_tx++; @@ -1290,6 +1292,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) } wmb(); priv->hw->desc->set_rx_owner(p + entry); + wmb(); } } -- cgit v0.10.2 From 684901a6df1fb91fc9a2bdb89ffbebb241428d78 Mon Sep 17 00:00:00 2001 From: Deepak Sikri Date: Sun, 8 Jul 2012 21:14:46 +0000 Subject: stmmac: Fix for higher mtu size handling For the higher mtu sizes requiring the buffer size greater than 8192, the buffers are sent or received using multiple dma descriptors/ same descriptor with option of multi buffer handling. It was observed during tests that the driver was missing on data packets during the normal ping operations if the data buffers being used catered to jumbo frame handling. The memory barrriers are added in between preparation of dma descriptors in the jumbo frame handling path to ensure all instructions before enabling the dma are complete. Signed-off-by: Deepak Sikri Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index fb8377d..4b785e1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -51,7 +51,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = desc->des2 + BUF_SIZE_4KiB; priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum); - + wmb(); entry = (++priv->cur_tx) % txsize; desc = priv->dma_tx + entry; @@ -59,6 +59,7 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum) len, DMA_TO_DEVICE); desc->des3 = desc->des2 + BUF_SIZE_4KiB; priv->hw->desc->prepare_tx_desc(desc, 0, len, csum); + wmb(); priv->hw->desc->set_tx_owner(desc); priv->tx_skbuff[entry] = NULL; } else { -- cgit v0.10.2 From a64d49c3dd504b685f9742a2f3dcb11fb8e4345f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jul 2012 10:51:45 +0000 Subject: bonding: Manage /proc/net/bonding/ entries from the netdev events It was recently reported that moving a bonding device between network namespaces causes warnings from /proc. It turns out after the move we were trying to add and to remove the /proc/net/bonding entries from the wrong network namespace. Move the bonding /proc registration code into the NETDEV_REGISTER and NETDEV_UNREGISTER events where the proc registration and unregistration will always happen at the right time. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b9c2ae6..2ee7699 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3227,6 +3227,12 @@ static int bond_master_netdev_event(unsigned long event, switch (event) { case NETDEV_CHANGENAME: return bond_event_changename(event_bond); + case NETDEV_UNREGISTER: + bond_remove_proc_entry(event_bond); + break; + case NETDEV_REGISTER: + bond_create_proc_entry(event_bond); + break; default: break; } @@ -4411,8 +4417,6 @@ static void bond_uninit(struct net_device *bond_dev) bond_work_cancel_all(bond); - bond_remove_proc_entry(bond); - bond_debug_unregister(bond); __hw_addr_flush(&bond->mc_list); @@ -4814,7 +4818,6 @@ static int bond_init(struct net_device *bond_dev) bond_set_lockdep_class(bond_dev); - bond_create_proc_entry(bond); list_add_tail(&bond->bond_list, &bn->dev_list); bond_prepare_sysfs_group(bond); -- cgit v0.10.2 From 96ca7ffe748bf91f851e6aa4479aa11c8b1122ba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jul 2012 10:52:43 +0000 Subject: bonding: debugfs and network namespaces are incompatible The bonding debugfs support has been broken in the presence of network namespaces since it has been added. The debugfs support does not handle multiple bonding devices with the same name in different network namespaces. I haven't had any bug reports, and I'm not interested in getting any. Disable the debugfs support when network namespaces are enabled. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 3680aa2..2cf084e 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -6,7 +6,7 @@ #include "bonding.h" #include "bond_alb.h" -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_NET_NS) #include #include -- cgit v0.10.2 From 91c68ce2b26319248a32d7baa1226f819d283758 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Jul 2012 21:45:10 +0000 Subject: net: cgroup: fix out of bounds accesses dev->priomap is allocated by extend_netdev_table() called from update_netdev_tables(). And this is only called if write_priomap() is called. But if write_priomap() is not called, it seems we can have out of bounds accesses in cgrp_destroy(), read_priomap() & skb_update_prio() With help from Gao Feng Signed-off-by: Eric Dumazet Cc: Neil Horman Cc: Gao feng Acked-by: Gao feng Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 84f01ba..0f28a9e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2444,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); - if ((!skb->priority) && (skb->sk) && map) - skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; + if (!skb->priority && skb->sk && map) { + unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; + } } #else #define skb_update_prio(skb) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index aa907ed..3e953ea 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -142,7 +142,7 @@ static void cgrp_destroy(struct cgroup *cgrp) rtnl_lock(); for_each_netdev(&init_net, dev) { map = rtnl_dereference(dev->priomap); - if (map) + if (map && cs->prioidx < map->priomap_len) map->priomap[cs->prioidx] = 0; } rtnl_unlock(); @@ -166,7 +166,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft, rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { map = rcu_dereference(dev->priomap); - priority = map ? map->priomap[prioidx] : 0; + priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; cb->fill(cb, dev->name, priority); } rcu_read_unlock(); -- cgit v0.10.2 From 1b9faf5e66bae8428a4ccdc1447d5399d1014581 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Jul 2012 01:37:38 +0000 Subject: drivers/isdn/mISDN/stack.c: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. The dereferences are just deleted from the debugging statement. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 1a0ae44..5f21f62 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -135,8 +135,8 @@ send_layer2(struct mISDNstack *st, struct sk_buff *skb) skb = NULL; else if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG - "%s ch%d mgr prim(%x) addr(%x) err %d\n", - __func__, ch->nr, hh->prim, ch->addr, ret); + "%s mgr prim(%x) err %d\n", + __func__, hh->prim, ret); } out: mutex_unlock(&st->lmutex); -- cgit v0.10.2 From cae296c42c94a27e0abdea96eb762752d1ba4908 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Jul 2012 01:37:39 +0000 Subject: net/rxrpc/ar-peer.c: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. This seems to be a copy-paste bug from a previous debugging message, and so the meaningless value is just deleted. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 2754f09..bebaa43 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -229,7 +229,7 @@ found_UDP_peer: return peer; new_UDP_peer: - _net("Rx UDP DGRAM from NEW peer %d", peer->debug_id); + _net("Rx UDP DGRAM from NEW peer"); read_unlock_bh(&rxrpc_peer_lock); _leave(" = -EBUSY [new]"); return ERR_PTR(-EBUSY); -- cgit v0.10.2 From 022f09784b85396b4ceba954ce28e50de4882281 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 8 Jul 2012 01:37:43 +0000 Subject: drivers/net/ethernet/broadcom/cnic.c: remove invalid reference to list iterator variable If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. There does not seem to be a meaningful value to provide to netdev_warn. Replace with pr_warn, since pr_err is used elsewhere. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 3c95065..2c89d17 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -534,7 +534,8 @@ int cnic_unregister_driver(int ulp_type) } if (atomic_read(&ulp_ops->ref_count) != 0) - netdev_warn(dev->netdev, "Failed waiting for ref count to go to zero\n"); + pr_warn("%s: Failed waiting for ref count to go to zero\n", + __func__); return 0; out_unlock: -- cgit v0.10.2 From 313b037cf054ec908de92fb4c085403ffd7420d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Jul 2012 11:45:13 +0000 Subject: gianfar: fix potential sk_wmem_alloc imbalance commit db83d136d7f753 (gianfar: Fix missing sock reference when processing TX time stamps) added a potential sk_wmem_alloc imbalance If the new skb has a different truesize than old one, we can get a negative sk_wmem_alloc once new skb is orphaned at TX completion. Now we no longer early orphan skbs in dev_hard_start_xmit(), this probably can lead to fatal bugs. Signed-off-by: Eric Dumazet Tested-by: Paul Gortmaker Cc: Manfred Rudigier Cc: Claudiu Manoil Cc: Jiajun Wu Cc: Andy Fleming Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index f2db8fc..ab1d80f 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2063,10 +2063,9 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } - /* Steal sock reference for processing TX time stamps */ - swap(skb_new->sk, skb->sk); - swap(skb_new->destructor, skb->destructor); - kfree_skb(skb); + if (skb->sk) + skb_set_owner_w(skb_new, skb->sk); + consume_skb(skb); skb = skb_new; } -- cgit v0.10.2 From efc73f4bbc238d4f579fb612c04c8e1dd8a82979 Mon Sep 17 00:00:00 2001 From: Amir Hanania Date: Mon, 9 Jul 2012 20:47:19 +0000 Subject: net: Fix memory leak - vlan_info struct In driver reload test there is a memory leak. The structure vlan_info was not freed when the driver was removed. It was not released since the nr_vids var is one after last vlan was removed. The nr_vids is one, since vlan zero is added to the interface when the interface is being set, but the vlan zero is not deleted at unregister. Fix - delete vlan zero when we unregister the device. Signed-off-by: Amir Hanania Acked-by: John Fastabend Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6089f0c..9096bcb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -403,6 +403,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_DOWN: + if (dev->features & NETIF_F_HW_VLAN_FILTER) + vlan_vid_del(dev, 0); + /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_N_VID; i++) { vlandev = vlan_group_get_device(grp, i); -- cgit v0.10.2 From b28ba72665356438e3a6e3be365c3c3071496840 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Jul 2012 10:03:41 +0000 Subject: IPoIB: fix skb truesize underestimatiom Or Gerlitz reported triggering of WARN_ON_ONCE(delta < len); in skb_try_coalesce() This warning tracks drivers that incorrectly set skb->truesize IPoIB indeed allocates a full page to store a fragment, but only accounts in skb->truesize the used part of the page (frame length) This patch fixes skb truesize underestimation, and also fixes a performance issue, because RX skbs have not enough tailroom to allow IP and TCP stacks to pull their header in skb linear part without an expensive call to pskb_expand_head() Signed-off-by: Eric Dumazet Reported-by: Or Gerlitz Cc: Erez Shitrit Cc: Shlomo Pongartz Cc: Roland Dreier Signed-off-by: David S. Miller diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5c1bc99..f10221f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -123,7 +123,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv, skb_frag_size_set(frag, size); skb->data_len += size; - skb->truesize += size; + skb->truesize += PAGE_SIZE; } else skb_put(skb, length); @@ -156,14 +156,18 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; int buf_size; + int tailroom; u64 *mapping; - if (ipoib_ud_need_sg(priv->max_ib_mtu)) + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { buf_size = IPOIB_UD_HEAD_SIZE; - else + tailroom = 128; /* reserve some tailroom for IP/TCP headers */ + } else { buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + tailroom = 0; + } - skb = dev_alloc_skb(buf_size + 4); + skb = dev_alloc_skb(buf_size + tailroom + 4); if (unlikely(!skb)) return NULL; -- cgit v0.10.2 From c1f5163de417dab01fa9daaf09a74bbb19303f3c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 10 Jul 2012 10:04:40 +0000 Subject: bnx2: Fix bug in bnx2_free_tx_skbs(). In rare cases, bnx2x_free_tx_skbs() can unmap the wrong DMA address when it gets to the last entry of the tx ring. We were not using the proper macro to skip the last entry when advancing the tx index. Reported-by: Zongyun Lai Reviewed-by: Jeffrey Huang Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index ac7b744..1fa4927 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -5372,7 +5372,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp) int k, last; if (skb == NULL) { - j++; + j = NEXT_TX_BD(j); continue; } @@ -5384,8 +5384,8 @@ bnx2_free_tx_skbs(struct bnx2 *bp) tx_buf->skb = NULL; last = tx_buf->nr_frags; - j++; - for (k = 0; k < last; k++, j++) { + j = NEXT_TX_BD(j); + for (k = 0; k < last; k++, j = NEXT_TX_BD(j)) { tx_buf = &txr->tx_buf_ring[TX_RING_IDX(j)]; dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping), -- cgit v0.10.2 From 7ac2908e4b2edaec60e9090ddb4d9ceb76c05e7d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 12 Jul 2012 03:39:11 +0000 Subject: sch_sfb: Fix missing NULL check Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=44461 Signed-off-by: Alan Cox Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 74305c8..30ea467 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -570,6 +570,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) sch->qstats.backlog = q->qdisc->qstats.backlog; opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; return nla_nest_end(skb, opts); -- cgit v0.10.2 From d0efa8f23a644f7cb7d1f8e78dd9a223efa412a3 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Thu, 12 Jul 2012 08:56:56 +0000 Subject: e1000e: Correct link check logic for 82571 serdes SYNCH bit and IV bit of RXCW register are sticky. Before examining these bits, RXCW should be read twice to filter out one-time false events and have correct values for these bits. Incorrect values of these bits in link check logic can cause weird link stability issues if auto-negotiation fails. CC: stable [2.6.38+] Reported-by: Dean Nelson Signed-off-by: Tushar Dave Reviewed-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 36db4df..1f063dc 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -1572,6 +1572,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) ctrl = er32(CTRL); status = er32(STATUS); rxcw = er32(RXCW); + /* SYNCH bit and IV bit are sticky */ + udelay(10); + rxcw = er32(RXCW); if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) { -- cgit v0.10.2 From a52359b56c29f55aaadf1dab80a0e1043b982676 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Sat, 14 Jul 2012 04:23:58 +0000 Subject: e1000e: fix test for PHY being accessible on 82577/8/9 and I217 Occasionally, the PHY can be initially inaccessible when the first read of a PHY register, e.g. PHY_ID1, happens (signified by the returned value 0xFFFF) but subsequent accesses of the PHY work as expected. Add a retry counter similar to how it is done in the generic e1000_get_phy_id(). Also, when the PHY is completely inaccessible (i.e. when subsequent reads of the PHY_IDx registers returns all F's) and the MDIO access mode must be set to slow before attempting to read the PHY ID again, the functions that do these latter two actions expect the SW/FW/HW semaphore is not already set so the semaphore must be released before and re-acquired after calling them otherwise there is an unnecessarily inordinate amount of delay during device initialization. Reported-by: Heikki Krogerus Signed-off-by: Bruce Allan Tested-by: Heikki Krogerus Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 238ab2f..e3a7b07 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -325,24 +325,46 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val) **/ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) { - u16 phy_reg; - u32 phy_id; + u16 phy_reg = 0; + u32 phy_id = 0; + s32 ret_val; + u16 retry_count; + + for (retry_count = 0; retry_count < 2; retry_count++) { + ret_val = e1e_rphy_locked(hw, PHY_ID1, &phy_reg); + if (ret_val || (phy_reg == 0xFFFF)) + continue; + phy_id = (u32)(phy_reg << 16); - e1e_rphy_locked(hw, PHY_ID1, &phy_reg); - phy_id = (u32)(phy_reg << 16); - e1e_rphy_locked(hw, PHY_ID2, &phy_reg); - phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); + ret_val = e1e_rphy_locked(hw, PHY_ID2, &phy_reg); + if (ret_val || (phy_reg == 0xFFFF)) { + phy_id = 0; + continue; + } + phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); + break; + } if (hw->phy.id) { if (hw->phy.id == phy_id) return true; - } else { - if ((phy_id != 0) && (phy_id != PHY_REVISION_MASK)) - hw->phy.id = phy_id; + } else if (phy_id) { + hw->phy.id = phy_id; + hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK); return true; } - return false; + /* + * In case the PHY needs to be in mdio slow mode, + * set slow mode and try to get the PHY id again. + */ + hw->phy.ops.release(hw); + ret_val = e1000_set_mdio_slow_mode_hv(hw); + if (!ret_val) + ret_val = e1000e_get_phy_id(hw); + hw->phy.ops.acquire(hw); + + return !ret_val; } /** -- cgit v0.10.2 From 310e158cc3b7a6adf41e778d52be746c4dc88561 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Jul 2012 13:15:52 +0200 Subject: net: respect GFP_DMA in __netdev_alloc_skb() Few drivers use GFP_DMA allocations, and netdev_alloc_frag() doesn't allocate pages in DMA zone. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46a3d23..d124306 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -353,7 +353,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (fragsz <= PAGE_SIZE && !(gfp_mask & __GFP_WAIT)) { + if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { void *data = netdev_alloc_frag(fragsz); if (likely(data)) { -- cgit v0.10.2 From 2eebc1e188e9e45886ee00662519849339884d6d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 16 Jul 2012 09:13:51 +0000 Subject: sctp: Fix list corruption resulting from freeing an association on a list A few days ago Dave Jones reported this oops: [22766.294255] general protection fault: 0000 [#1] PREEMPT SMP [22766.295376] CPU 0 [22766.295384] Modules linked in: [22766.387137] ffffffffa169f292 6b6b6b6b6b6b6b6b ffff880147c03a90 ffff880147c03a74 [22766.387135] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 00000000000 [22766.387136] Process trinity-watchdo (pid: 10896, threadinfo ffff88013e7d2000, [22766.387137] Stack: [22766.387140] ffff880147c03a10 [22766.387140] ffffffffa169f2b6 [22766.387140] ffff88013ed95728 [22766.387143] 0000000000000002 [22766.387143] 0000000000000000 [22766.387143] ffff880003fad062 [22766.387144] ffff88013c120000 [22766.387144] [22766.387145] Call Trace: [22766.387145] [22766.387150] [] ? __sctp_lookup_association+0x62/0xd0 [sctp] [22766.387154] [] __sctp_lookup_association+0x86/0xd0 [sctp] [22766.387157] [] sctp_rcv+0x207/0xbb0 [sctp] [22766.387161] [] ? trace_hardirqs_off_caller+0x28/0xd0 [22766.387163] [] ? nf_hook_slow+0x133/0x210 [22766.387166] [] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387168] [] ip_local_deliver_finish+0x18d/0x4c0 [22766.387169] [] ? ip_local_deliver_finish+0x4c/0x4c0 [22766.387171] [] ip_local_deliver+0x47/0x80 [22766.387172] [] ip_rcv_finish+0x150/0x680 [22766.387174] [] ip_rcv+0x214/0x320 [22766.387176] [] __netif_receive_skb+0x7b7/0x910 [22766.387178] [] ? __netif_receive_skb+0x11c/0x910 [22766.387180] [] ? put_lock_stats.isra.25+0xe/0x40 [22766.387182] [] netif_receive_skb+0x23/0x1f0 [22766.387183] [] ? dev_gro_receive+0x139/0x440 [22766.387185] [] napi_skb_finish+0x70/0xa0 [22766.387187] [] napi_gro_receive+0xf5/0x130 [22766.387218] [] e1000_receive_skb+0x59/0x70 [e1000e] [22766.387242] [] e1000_clean_rx_irq+0x28b/0x460 [e1000e] [22766.387266] [] e1000e_poll+0x78/0x430 [e1000e] [22766.387268] [] net_rx_action+0x1aa/0x3d0 [22766.387270] [] ? account_system_vtime+0x10f/0x130 [22766.387273] [] __do_softirq+0xe0/0x420 [22766.387275] [] call_softirq+0x1c/0x30 [22766.387278] [] do_softirq+0xd5/0x110 [22766.387279] [] irq_exit+0xd5/0xe0 [22766.387281] [] do_IRQ+0x63/0xd0 [22766.387283] [] common_interrupt+0x6f/0x6f [22766.387283] [22766.387284] [22766.387285] [] ? retint_swapgs+0x13/0x1b [22766.387285] Code: c0 90 5d c3 66 0f 1f 44 00 00 4c 89 c8 5d c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 <0f> b7 87 98 00 00 00 48 89 fb 49 89 f5 66 c1 c0 08 66 39 46 02 [22766.387307] [22766.387307] RIP [22766.387311] [] sctp_assoc_is_match+0x19/0x90 [sctp] [22766.387311] RSP [22766.387142] ffffffffa16ab120 [22766.599537] ---[ end trace 3f6dae82e37b17f5 ]--- [22766.601221] Kernel panic - not syncing: Fatal exception in interrupt It appears from his analysis and some staring at the code that this is likely occuring because an association is getting freed while still on the sctp_assoc_hashtable. As a result, we get a gpf when traversing the hashtable while a freed node corrupts part of the list. Nominally I would think that an mibalanced refcount was responsible for this, but I can't seem to find any obvious imbalance. What I did note however was that the two places where we create an association using sctp_primitive_ASSOCIATE (__sctp_connect and sctp_sendmsg), have failure paths which free a newly created association after calling sctp_primitive_ASSOCIATE. sctp_primitive_ASSOCIATE brings us into the sctp_sf_do_prm_asoc path, which issues a SCTP_CMD_NEW_ASOC side effect, which in turn adds a new association to the aforementioned hash table. the sctp command interpreter that process side effects has not way to unwind previously processed commands, so freeing the association from the __sctp_connect or sctp_sendmsg error path would lead to a freed association remaining on this hash table. I've fixed this but modifying sctp_[un]hash_established to use hlist_del_init, which allows us to proerly use hlist_unhashed to check if the node is on a hashlist safely during a delete. That in turn alows us to safely call sctp_unhash_established in the __sctp_connect and sctp_sendmsg error paths before freeing them, regardles of what the associations state is on the hash list. I noted, while I was doing this, that the __sctp_unhash_endpoint was using hlist_unhsashed in a simmilar fashion, but never nullified any removed nodes pointers to make that function work properly, so I fixed that up in a simmilar fashion. I attempted to test this using a virtual guest running the SCTP_RR test from netperf in a loop while running the trinity fuzzer, both in a loop. I wasn't able to recreate the problem prior to this fix, nor was I able to trigger the failure after (neither of which I suppose is suprising). Given the trace above however, I think its likely that this is what we hit. Signed-off-by: Neil Horman Reported-by: davej@redhat.com CC: davej@redhat.com CC: "David S. Miller" CC: Vlad Yasevich CC: Sridhar Samudrala CC: linux-sctp@vger.kernel.org Signed-off-by: David S. Miller diff --git a/net/sctp/input.c b/net/sctp/input.c index 80564fe..8b9b679 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -736,15 +736,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) epb = &ep->base; - if (hlist_unhashed(&epb->node)) - return; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } @@ -825,7 +822,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; sctp_write_lock(&head->lock); - __hlist_del(&epb->node); + hlist_del_init(&epb->node); sctp_write_unlock(&head->lock); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b3b8a8d..31c7bfc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1231,8 +1231,14 @@ out_free: SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" " kaddrs: %p err: %d\n", asoc, kaddrs, err); - if (asoc) + if (asoc) { + /* sctp_primitive_ASSOCIATE may have added this association + * To the hash table, try to unhash it, just in case, its a noop + * if it wasn't hashed so we're safe + */ + sctp_unhash_established(asoc); sctp_association_free(asoc); + } return err; } @@ -1942,8 +1948,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; out_free: - if (new_asoc) + if (new_asoc) { + sctp_unhash_established(asoc); sctp_association_free(asoc); + } out_unlock: sctp_release_sock(sk); -- cgit v0.10.2 From 936597631dd310e220544dc5c6075d924efd39b2 Mon Sep 17 00:00:00 2001 From: Narendra K Date: Mon, 16 Jul 2012 15:24:41 +0000 Subject: ixgbevf: Prevent RX/TX statistics getting reset to zero The commit 4197aa7bb81877ebb06e4f2cc1b5fea2da23a7bd implements 64 bit per ring statistics. But the driver resets the 'total_bytes' and 'total_packets' from RX and TX rings in the RX and TX interrupt handlers to zero. This results in statistics being lost and user space reporting RX and TX statistics as zero. This patch addresses the issue by preventing the resetting of RX and TX ring statistics to zero. Signed-off-by: Narendra K Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index f69ec42..8b304a4 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -969,8 +969,6 @@ static irqreturn_t ixgbevf_msix_clean_tx(int irq, void *data) r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues); for (i = 0; i < q_vector->txr_count; i++) { tx_ring = &(adapter->tx_ring[r_idx]); - tx_ring->total_bytes = 0; - tx_ring->total_packets = 0; ixgbevf_clean_tx_irq(adapter, tx_ring); r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues, r_idx + 1); @@ -994,16 +992,6 @@ static irqreturn_t ixgbevf_msix_clean_rx(int irq, void *data) struct ixgbe_hw *hw = &adapter->hw; struct ixgbevf_ring *rx_ring; int r_idx; - int i; - - r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); - for (i = 0; i < q_vector->rxr_count; i++) { - rx_ring = &(adapter->rx_ring[r_idx]); - rx_ring->total_bytes = 0; - rx_ring->total_packets = 0; - r_idx = find_next_bit(q_vector->rxr_idx, adapter->num_rx_queues, - r_idx + 1); - } if (!q_vector->rxr_count) return IRQ_HANDLED; -- cgit v0.10.2 From ef209f15980360f6945873df3cd710c5f62f2a3e Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 11 Jul 2012 21:50:15 +0000 Subject: net: cgroup: fix access the unallocated memory in netprio cgroup there are some out of bound accesses in netprio cgroup. now before accessing the dev->priomap.priomap array,we only check if the dev->priomap exist.and because we don't want to see additional bound checkings in fast path, so we should make sure that dev->priomap is null or array size of dev->priomap.priomap is equal to max_prioidx + 1; so in write_priomap logic,we should call extend_netdev_table when dev->priomap is null and dev->priomap.priomap_len < max_len. and in cgrp_create->update_netdev_tables logic,we should call extend_netdev_table only when dev->priomap exist and dev->priomap.priomap_len < max_len. and it's not needed to call update_netdev_tables in write_priomap, we can only allocate the net device's priomap which we change through net_prio.ifpriomap. this patch also add a return value for update_netdev_tables & extend_netdev_table, so when new_priomap is allocated failed, write_priomap will stop to access the priomap,and return -ENOMEM back to the userspace to tell the user what happend. Change From v3: 1. add rtnl protect when reading max_prioidx in write_priomap. 2. only call extend_netdev_table when map->priomap_len < max_len, this will make sure array size of dev->map->priomap always bigger than any prioidx. 3. add a function write_update_netdev_table to make codes clear. Change From v2: 1. protect extend_netdev_table by RTNL. 2. when extend_netdev_table failed,call dev_put to reduce device's refcount. Signed-off-by: Gao feng Cc: Neil Horman Cc: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3e953ea..b2e9caa 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -65,7 +65,7 @@ static void put_prioidx(u32 idx) spin_unlock_irqrestore(&prioidx_map_lock, flags); } -static void extend_netdev_table(struct net_device *dev, u32 new_len) +static int extend_netdev_table(struct net_device *dev, u32 new_len) { size_t new_size = sizeof(struct netprio_map) + ((sizeof(u32) * new_len)); @@ -77,7 +77,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) if (!new_priomap) { pr_warn("Unable to alloc new priomap!\n"); - return; + return -ENOMEM; } for (i = 0; @@ -90,46 +90,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) rcu_assign_pointer(dev->priomap, new_priomap); if (old_priomap) kfree_rcu(old_priomap, rcu); + return 0; } -static void update_netdev_tables(void) +static int write_update_netdev_table(struct net_device *dev) { + int ret = 0; + u32 max_len; + struct netprio_map *map; + + rtnl_lock(); + max_len = atomic_read(&max_prioidx) + 1; + map = rtnl_dereference(dev->priomap); + if (!map || map->priomap_len < max_len) + ret = extend_netdev_table(dev, max_len); + rtnl_unlock(); + + return ret; +} + +static int update_netdev_tables(void) +{ + int ret = 0; struct net_device *dev; - u32 max_len = atomic_read(&max_prioidx) + 1; + u32 max_len; struct netprio_map *map; rtnl_lock(); + max_len = atomic_read(&max_prioidx) + 1; for_each_netdev(&init_net, dev) { map = rtnl_dereference(dev->priomap); - if ((!map) || - (map->priomap_len < max_len)) - extend_netdev_table(dev, max_len); + /* + * don't allocate priomap if we didn't + * change net_prio.ifpriomap (map == NULL), + * this will speed up skb_update_prio. + */ + if (map && map->priomap_len < max_len) { + ret = extend_netdev_table(dev, max_len); + if (ret < 0) + break; + } } rtnl_unlock(); + return ret; } static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; - int ret; + int ret = -EINVAL; cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); - if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { - kfree(cs); - return ERR_PTR(-EINVAL); - } + if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) + goto out; ret = get_prioidx(&cs->prioidx); - if (ret != 0) { + if (ret < 0) { pr_warn("No space in priority index array\n"); - kfree(cs); - return ERR_PTR(ret); + goto out; + } + + ret = update_netdev_tables(); + if (ret < 0) { + put_prioidx(cs->prioidx); + goto out; } return &cs->css; +out: + kfree(cs); + return ERR_PTR(ret); } static void cgrp_destroy(struct cgroup *cgrp) @@ -221,13 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, if (!dev) goto out_free_devname; - update_netdev_tables(); - ret = 0; + ret = write_update_netdev_table(dev); + if (ret < 0) + goto out_put_dev; + rcu_read_lock(); map = rcu_dereference(dev->priomap); if (map) map->priomap[prioidx] = priority; rcu_read_unlock(); + +out_put_dev: dev_put(dev); out_free_devname: -- cgit v0.10.2 From 96f80d123eff05c3cd4701463786b87952a6c3ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= Date: Sun, 15 Jul 2012 10:10:14 +0000 Subject: caif: Fix access to freed pernet memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unregister_netdevice_notifier() must be called before unregister_pernet_subsys() to avoid accessing already freed pernet memory. This fixes the following oops when doing rmmod: Call Trace: [] caif_device_notify+0x4d/0x5a0 [caif] [] unregister_netdevice_notifier+0xb9/0x100 [] caif_device_exit+0x1c/0x250 [caif] [] sys_delete_module+0x1a4/0x300 [] ? trace_hardirqs_on_caller+0x15d/0x1e0 [] ? trace_hardirqs_on_thunk+0x3a/0x3 [] system_call_fastpath+0x1a/0x1f RIP [] caif_get+0x51/0xb0 [caif] Signed-off-by: Sjur Brændeland Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 554b312..8c83c17 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -561,9 +561,9 @@ static int __init caif_device_init(void) static void __exit caif_device_exit(void) { - unregister_pernet_subsys(&caif_net_ops); unregister_netdevice_notifier(&caif_device_notifier); dev_remove_pack(&caif_packet_type); + unregister_pernet_subsys(&caif_net_ops); } module_init(caif_device_init); -- cgit v0.10.2 From 68653359beb59014b88e4f297e467730a1680996 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 13 Jul 2012 20:15:34 +0000 Subject: MAINTAINERS: reflect actual changes in IEEE 802.15.4 maintainership As the life flows, developers priorities shifts a bit. Reflect actual changes in the maintainership of IEEE 802.15.4 code: Sergey mostly stopped cared about this piece of code. Most of the work recently was done by Alexander, so put him to the MAINTAINERS file to reflect his status and to ease the life of respective patches. Also add new net/mac802154/ directory to the list of maintained files. Signed-off-by: Dmitry Eremin-Solenikov Cc: Alexander Smirnov Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 03df1d1..3064a9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3433,13 +3433,14 @@ S: Supported F: drivers/idle/i7300_idle.c IEEE 802.15.4 SUBSYSTEM +M: Alexander Smirnov M: Dmitry Eremin-Solenikov -M: Sergey Lapin L: linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://apps.sourceforge.net/trac/linux-zigbee T: git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git S: Maintained F: net/ieee802154/ +F: net/mac802154/ F: drivers/ieee802154/ IIO SUBSYSTEM AND DRIVERS -- cgit v0.10.2 From ef764a13b8f4bc4b532a51bc37c35b3974831b29 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 13 Jul 2012 06:33:08 +0000 Subject: ax25: Fix missing break At least there seems to be no reason to disallow ROSE sockets when NETROM is loaded. Signed-off-by: Alan Cox Signed-off-by: David S. Miller diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 051f7ab..779095d 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -842,6 +842,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, case AX25_P_NETROM: if (ax25_protocol_is_registered(AX25_P_NETROM)) return -ESOCKTNOSUPPORT; + break; #endif #ifdef CONFIG_ROSE_MODULE case AX25_P_ROSE: -- cgit v0.10.2 From 10cc1bdd5ef65f60f570b594c4c066d763c128fb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 16 Jul 2012 23:44:48 +0000 Subject: ixgbevf: Fix panic when loading driver This patch addresses a kernel panic seen when setting up the interface. Specifically we see a NULL pointer dereference on the Tx descriptor cleanup path when enabling interrupts. This change corrects that so it cannot occur. Signed-off-by: Alexander Duyck Acked-by: Greg Rose Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 8b304a4..41e3225 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -201,6 +201,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_adapter *adapter, unsigned int i, eop, count = 0; unsigned int total_bytes = 0, total_packets = 0; + if (test_bit(__IXGBEVF_DOWN, &adapter->state)) + return true; + i = tx_ring->next_to_clean; eop = tx_ring->tx_buffer_info[i].next_to_watch; eop_desc = IXGBE_TX_DESC_ADV(*tx_ring, eop); -- cgit v0.10.2 From 9e33ce453f8ac8452649802bee1f410319408f4b Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 7 Jul 2012 18:26:10 +0800 Subject: ipvs: fix oops on NAT reply in br_nf context IPVS should not reset skb->nf_bridge in FORWARD hook by calling nf_reset for NAT replies. It triggers oops in br_nf_forward_finish. [ 579.781508] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 579.781669] IP: [] br_nf_forward_finish+0x58/0x112 [ 579.781792] PGD 218f9067 PUD 0 [ 579.781865] Oops: 0000 [#1] SMP [ 579.781945] CPU 0 [ 579.781983] Modules linked in: [ 579.782047] [ 579.782080] [ 579.782114] Pid: 4644, comm: qemu Tainted: G W 3.5.0-rc5-00006-g95e69f9 #282 Hewlett-Packard /30E8 [ 579.782300] RIP: 0010:[] [] br_nf_forward_finish+0x58/0x112 [ 579.782455] RSP: 0018:ffff88007b003a98 EFLAGS: 00010287 [ 579.782541] RAX: 0000000000000008 RBX: ffff8800762ead00 RCX: 000000000001670a [ 579.782653] RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff8800762ead00 [ 579.782845] RBP: ffff88007b003ac8 R08: 0000000000016630 R09: ffff88007b003a90 [ 579.782957] R10: ffff88007b0038e8 R11: ffff88002da37540 R12: ffff88002da01a02 [ 579.783066] R13: ffff88002da01a80 R14: ffff88002d83c000 R15: ffff88002d82a000 [ 579.783177] FS: 0000000000000000(0000) GS:ffff88007b000000(0063) knlGS:00000000f62d1b70 [ 579.783306] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 579.783395] CR2: 0000000000000004 CR3: 00000000218fe000 CR4: 00000000000027f0 [ 579.783505] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 579.783684] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 579.783795] Process qemu (pid: 4644, threadinfo ffff880021b20000, task ffff880021aba760) [ 579.783919] Stack: [ 579.783959] ffff88007693cedc ffff8800762ead00 ffff88002da01a02 ffff8800762ead00 [ 579.784110] ffff88002da01a02 ffff88002da01a80 ffff88007b003b18 ffffffff817b26c7 [ 579.784260] ffff880080000000 ffffffff81ef59f0 ffff8800762ead00 ffffffff81ef58b0 [ 579.784477] Call Trace: [ 579.784523] [ 579.784562] [ 579.784603] [] br_nf_forward_ip+0x275/0x2c8 [ 579.784707] [] nf_iterate+0x47/0x7d [ 579.784797] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.784906] [] nf_hook_slow+0x6d/0x102 [ 579.784995] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.785175] [] ? _raw_write_unlock_bh+0x19/0x1b [ 579.785179] [] __br_forward+0x97/0xa2 [ 579.785179] [] br_handle_frame_finish+0x1a6/0x257 [ 579.785179] [] br_nf_pre_routing_finish+0x26d/0x2cb [ 579.785179] [] br_nf_pre_routing+0x55d/0x5c1 [ 579.785179] [] nf_iterate+0x47/0x7d [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] nf_hook_slow+0x6d/0x102 [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] ? sky2_poll+0xb35/0xb54 [ 579.785179] [] br_handle_frame+0x213/0x229 [ 579.785179] [] ? br_handle_frame_finish+0x257/0x257 [ 579.785179] [] __netif_receive_skb+0x2b4/0x3f1 [ 579.785179] [] process_backlog+0x99/0x1e2 [ 579.785179] [] net_rx_action+0xdf/0x242 [ 579.785179] [] __do_softirq+0xc1/0x1e0 [ 579.785179] [] ? trace_hardirqs_off_thunk+0x3a/0x6c [ 579.785179] [] call_softirq+0x1c/0x30 The steps to reproduce as follow, 1. On Host1, setup brige br0(192.168.1.106) 2. Boot a kvm guest(192.168.1.105) on Host1 and start httpd 3. Start IPVS service on Host1 ipvsadm -A -t 192.168.1.106:80 -s rr ipvsadm -a -t 192.168.1.106:80 -r 192.168.1.105:80 -m 4. Run apache benchmark on Host2(192.168.1.101) ab -n 1000 http://192.168.1.106/ ip_vs_reply4 ip_vs_out handle_response ip_vs_notrack nf_reset() { skb->nf_bridge = NULL; } Actually, IPVS wants in this case just to replace nfct with untracked version. So replace the nf_reset(skb) call in ip_vs_notrack() with a nf_conntrack_put(skb->nfct) call. Signed-off-by: Lin Ming Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d6146b4..95374d1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1425,7 +1425,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - nf_reset(skb); + nf_conntrack_put(skb->nfct); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); -- cgit v0.10.2 From 283283c4da91adc44b03519f434ee1e7e91d6fdb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 7 Jul 2012 20:30:11 +0300 Subject: ipvs: fix oops in ip_vs_dst_event on rmmod After commit 39f618b4fd95ae243d940ec64c961009c74e3333 (3.4) "ipvs: reset ipvs pointer in netns" we can oops in ip_vs_dst_event on rmmod ip_vs because ip_vs_control_cleanup is called after the ipvs_core_ops subsys is unregistered and net->ipvs is NULL. Fix it by exiting early from ip_vs_dst_event if ipvs is NULL. It is safe because all services and dests for the net are already freed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d43e3c1..84444dd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1521,11 +1521,12 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; struct net *net = dev_net(dev); + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; struct ip_vs_dest *dest; unsigned int idx; - if (event != NETDEV_UNREGISTER) + if (event != NETDEV_UNREGISTER || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); EnterFunction(2); @@ -1551,7 +1552,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } } - list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + list_for_each_entry(dest, &ipvs->dest_trash, n_list) { __ip_vs_dev_reset(dest, dev); } mutex_unlock(&__ip_vs_mutex); -- cgit v0.10.2