From 9c5a18a31b321f120efda412281bb9f610f84aa0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Jun 2015 21:35:44 +0200 Subject: cfg80211: wext: clear sinfo struct before calling driver Until recently, mac80211 overwrote all the statistics it could provide when getting called, but it now relies on the struct having been zeroed by the caller. This was always the case in nl80211, but wext used a static struct which could even cause values from one device leak to another. Using a static struct is OK (as even documented in a comment) since the whole usage of this function and its return value is always locked under RTNL. Not clearing the struct for calling the driver has always been wrong though, since drivers were free to only fill values they could report, so calling this for one device and then for another would always have leaked values from one to the other. Fix this by initializing the structure in question before the driver method call. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=99691 Cc: stable@vger.kernel.org Reported-by: Gerrit Renker Reported-by: Alexander Kaltsas Signed-off-by: Johannes Berg Signed-off-by: David S. Miller diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index fff1bef..fd68283 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1333,6 +1333,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); wdev_unlock(wdev); + memset(&sinfo, 0, sizeof(sinfo)); + if (rdev_get_station(rdev, dev, bssid, &sinfo)) return NULL; -- cgit v0.10.2 From 1b0ccfe54a6abd1bc4d7bdd1c33e61e2c58f72c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Jun 2015 15:29:31 -0700 Subject: Revert "ipv6: Fix protocol resubmission" This reverts commit 0243508edd317ff1fa63b495643a7c192fbfcd92. It introduces new regressions. Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 41a73da..f2e464e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) */ rcu_read_lock(); +resubmit: idev = ip6_dst_idev(skb_dst(skb)); if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; -resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { @@ -246,12 +246,10 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret < 0) { - nexthdr = -ret; + if (ret > 0) goto resubmit; - } else if (ret == 0) { + else if (ret == 0) IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); - } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { -- cgit v0.10.2 From b3be5e3e726a6cc849f40c70c3ae525e4146e9df Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 9 Jun 2015 17:27:12 +0200 Subject: tipc: disconnect socket directly after probe failure If the TIPC connection timer expires in a probing state, a self abort message is supposed to be generated and delivered to the local socket. This is currently broken, and the abort message is actually sent out to the peer node with invalid addressing information. This will cause the link to enter a constant retransmission state and eventually reset. We fix this by removing the self-abort message creation and tear down connection immediately instead. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5c..f485600 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2142,11 +2142,17 @@ static void tipc_sk_timeout(unsigned long data) peer_node = tsk_peer_node(tsk); if (tsk->probing_state == TIPC_CONN_PROBING) { - /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, 0, - own_node, peer_node, tsk->portid, - peer_port, TIPC_ERR_NO_PORT); + if (!sock_owned_by_user(sk)) { + sk->sk_socket->state = SS_DISCONNECTING; + tsk->connected = 0; + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + } else { + /* Try again later */ + sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); + } + } else { skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, own_node, -- cgit v0.10.2 From 1a040eaca1a22f8da8285ceda6b5e4a2cb704867 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 9 Jun 2015 10:23:57 -0700 Subject: bridge: fix multicast router rlist endless loop Since the addition of sysfs multicast router support if one set multicast_router to "2" more than once, then the port would be added to the hlist every time and could end up linking to itself and thus causing an endless loop for rlist walkers. So to reproduce just do: echo 2 > multicast_router; echo 2 > multicast_router; in a bridge port and let some igmp traffic flow, for me it hangs up in br_multicast_flood(). Fix this by adding a check in br_multicast_add_router() if the port is already linked. The reason this didn't happen before the addition of multicast_router sysfs entries is because there's a !hlist_unhashed check that prevents it. Signed-off-by: Nikolay Aleksandrov Fixes: 0909e11758bd ("bridge: Add multicast_router sysfs entries") Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22fd041..ff667e1 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1167,6 +1167,9 @@ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *p; struct hlist_node *slot = NULL; + if (!hlist_unhashed(&port->rlist)) + return; + hlist_for_each_entry(p, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; @@ -1194,12 +1197,8 @@ static void br_multicast_mark_router(struct net_bridge *br, if (port->multicast_router != 1) return; - if (!hlist_unhashed(&port->rlist)) - goto timer; - br_multicast_add_router(br, port); -timer: mod_timer(&port->multicast_router_timer, now + br->multicast_querier_interval); } -- cgit v0.10.2 From 5d753610277862fd8050901f38b6571b9500cdb6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 10 Jun 2015 21:02:04 -0400 Subject: net, swap: Remove a warning and clarify why sk_mem_reclaim is required when deactivating swap Jeff Layton reported the following; [ 74.232485] ------------[ cut here ]------------ [ 74.233354] WARNING: CPU: 2 PID: 754 at net/core/sock.c:364 sk_clear_memalloc+0x51/0x80() [ 74.234790] Modules linked in: cts rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache xfs libcrc32c snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device nfsd snd_pcm snd_timer snd e1000 ppdev parport_pc joydev parport pvpanic soundcore floppy serio_raw i2c_piix4 pcspkr nfs_acl lockd virtio_balloon acpi_cpufreq auth_rpcgss grace sunrpc qxl drm_kms_helper ttm drm virtio_console virtio_blk virtio_pci ata_generic virtio_ring pata_acpi virtio [ 74.243599] CPU: 2 PID: 754 Comm: swapoff Not tainted 4.1.0-rc6+ #5 [ 74.244635] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 74.245546] 0000000000000000 0000000079e69e31 ffff8800d066bde8 ffffffff8179263d [ 74.246786] 0000000000000000 0000000000000000 ffff8800d066be28 ffffffff8109e6fa [ 74.248175] 0000000000000000 ffff880118d48000 ffff8800d58f5c08 ffff880036e380a8 [ 74.249483] Call Trace: [ 74.249872] [] dump_stack+0x45/0x57 [ 74.250703] [] warn_slowpath_common+0x8a/0xc0 [ 74.251655] [] warn_slowpath_null+0x1a/0x20 [ 74.252585] [] sk_clear_memalloc+0x51/0x80 [ 74.253519] [] xs_disable_swap+0x42/0x80 [sunrpc] [ 74.254537] [] rpc_clnt_swap_deactivate+0x7e/0xc0 [sunrpc] [ 74.255610] [] nfs_swap_deactivate+0x27/0x30 [nfs] [ 74.256582] [] destroy_swap_extents+0x74/0x80 [ 74.257496] [] SyS_swapoff+0x222/0x5c0 [ 74.258318] [] ? syscall_trace_leave+0xc7/0x140 [ 74.259253] [] system_call_fastpath+0x12/0x71 [ 74.260158] ---[ end trace 2530722966429f10 ]--- The warning in question was unnecessary but with Jeff's series the rules are also clearer. This patch removes the warning and updates the comment to explain why sk_mem_reclaim() may still be called. [jlayton: remove if (sk->sk_forward_alloc) conditional. As Leon points out that it's not needed.] Cc: Leon Romanovsky Signed-off-by: Mel Gorman Signed-off-by: Jeff Layton Signed-off-by: David S. Miller diff --git a/net/core/sock.c b/net/core/sock.c index 292f422..469d603 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -354,15 +354,12 @@ void sk_clear_memalloc(struct sock *sk) /* * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward - * progress of swapping. However, if SOCK_MEMALLOC is cleared while - * it has rmem allocations there is a risk that the user of the - * socket cannot make forward progress due to exceeding the rmem - * limits. By rights, sk_clear_memalloc() should only be called - * on sockets being torn down but warn and reset the accounting if - * that assumption breaks. + * progress of swapping. SOCK_MEMALLOC may be cleared while + * it has rmem allocations due to the last swapfile being deactivated + * but there is a risk that the socket is unusable due to exceeding + * the rmem limits. Reclaim the reserves and obey rmem limits again. */ - if (WARN_ON(sk->sk_forward_alloc)) - sk_mem_reclaim(sk); + sk_mem_reclaim(sk); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); -- cgit v0.10.2 From 6286e8285078ab02b215bba1d42a05ecfd864515 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:54 +0530 Subject: enic: unlock napi busy poll before unmasking intr There is a small window between vnic_intr_unmask() and enic_poll_unlock_napi(). In this window if an irq occurs and napi is scheduled on different cpu, it tries to acquire enic_poll_lock_napi() and hits the following WARN_ON message. Fix is to unlock napi_poll before unmasking the interrupt. [ 781.121746] ------------[ cut here ]------------ [ 781.121789] WARNING: CPU: 1 PID: 0 at drivers/net/ethernet/cisco/enic/vnic_rq.h:228 enic_poll_msix_rq+0x36a/0x3c0 [enic]() [ 781.121834] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 dns_resolver coretemp intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp kvm_intel kvm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel mgag200 ttm drm_kms_helper joydev aes_x86_64 lrw drm gf128mul mousedev glue_helper sb_edac ablk_helper iTCO_wdt iTCO_vendor_support evdev ipmi_si syscopyarea sysfillrect sysimgblt i2c_algo_bit i2c_core edac_core lpc_ich mac_hid cryptd pcspkr ipmi_msghandler shpchp tpm_tis acpi_power_meter tpm wmi processor hwmon button ac sch_fq_codel nfs lockd grace sunrpc fscache hid_generic usbhid hid ehci_pci ehci_hcd sd_mod megaraid_sas usbcore scsi_mod usb_common enic crc32c_generic crc32c_intel btrfs xor raid6_pq ext4 crc16 mbcache jbd2 [ 781.122176] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.1.0-rc6-ARCH-00040-gc46a024-dirty #106 [ 781.122210] Hardware name: Cisco Systems Inc UCSB-B200-M4/UCSB-B200-M4, BIOS B200M4.2.2.2.23.061220140128 06/12/2014 [ 781.122252] 0000000000000000 bddbbc9d655ec96e ffff880277e43da8 ffffffff81583fe8 [ 781.122286] 0000000000000000 0000000000000000 ffff880277e43de8 ffffffff8107acfa [ 781.122319] ffff880272c01000 ffff880273f18000 ffff880273f1a100 0000000000000000 [ 781.122352] Call Trace: [ 781.122364] [] dump_stack+0x4f/0x7b [ 781.122399] [] warn_slowpath_common+0x8a/0xc0 [ 781.122425] [] warn_slowpath_null+0x1a/0x20 [ 781.122455] [] enic_poll_msix_rq+0x36a/0x3c0 [enic] [ 781.122487] [] net_rx_action+0x22a/0x370 [ 781.122512] [] __do_softirq+0xed/0x2d0 [ 781.122537] [] irq_exit+0x7e/0xa0 [ 781.122560] [] do_IRQ+0x64/0x100 [ 781.122582] [] common_interrupt+0x6e/0x6e [ 781.122605] [] ? cpu_startup_entry+0x121/0x480 [ 781.122638] [] ? cpu_startup_entry+0xec/0x480 [ 781.122667] [] ? clockevents_register_device+0x113/0x1f0 [ 781.122698] [] start_secondary+0x196/0x1e0 [ 781.122723] ---[ end trace cec2e9dd3af7b9db ]--- Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 204bd182..0e5a01d 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1407,6 +1407,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) */ enic_calc_int_moderation(enic, &enic->rq[rq]); + enic_poll_unlock_napi(&enic->rq[rq]); if (work_done < work_to_do) { /* Some work done, but not enough to stay in polling, @@ -1418,7 +1419,6 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) enic_set_int_moderation(enic, &enic->rq[rq]); vnic_intr_unmask(&enic->intr[intr]); } - enic_poll_unlock_napi(&enic->rq[rq]); return work_done; } -- cgit v0.10.2 From 19b596bda1c5400808635fde0d521c1f89a6c1a3 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:55 +0530 Subject: enic: check return value for stat dump We do not check the return value of enic_dev_stats_dump(). If allocation fails, we will hit NULL pointer reference. Return only if memory allocation fails. For other failures, we return the previously recorded values. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 28d9ca6..68d47b1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -131,8 +131,15 @@ static void enic_get_drvinfo(struct net_device *netdev, { struct enic *enic = netdev_priv(netdev); struct vnic_devcmd_fw_info *fw_info; + int err; - enic_dev_fw_info(enic, &fw_info); + err = enic_dev_fw_info(enic, &fw_info); + /* return only when pci_zalloc_consistent fails in vnic_dev_fw_info + * For other failures, like devcmd failure, we return previously + * recorded info. + */ + if (err == -ENOMEM) + return; strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); @@ -181,8 +188,15 @@ static void enic_get_ethtool_stats(struct net_device *netdev, struct enic *enic = netdev_priv(netdev); struct vnic_stats *vstats; unsigned int i; - - enic_dev_stats_dump(enic, &vstats); + int err; + + err = enic_dev_stats_dump(enic, &vstats); + /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump + * For other failures, like devcmd failure, we return previously + * recorded stats. + */ + if (err == -ENOMEM) + return; for (i = 0; i < enic_n_tx_stats; i++) *(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].index]; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 0e5a01d..eadae1b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -615,8 +615,15 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, { struct enic *enic = netdev_priv(netdev); struct vnic_stats *stats; + int err; - enic_dev_stats_dump(enic, &stats); + err = enic_dev_stats_dump(enic, &stats); + /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump + * For other failures, like devcmd failure, we return previously + * recorded stats. + */ + if (err == -ENOMEM) + return net_stats; net_stats->tx_packets = stats->tx.tx_frames_ok; net_stats->tx_bytes = stats->tx.tx_bytes_ok; -- cgit v0.10.2 From 8b13b4e0bc884ba7dc8ee4de3ee915b7d30e7f78 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:56 +0530 Subject: enic: fix memory leak in rq_clean When incoming packet qualifies for rx_copybreak, we copy the data to newly allocated skb. We do not free/unmap the original buffer. At this point driver assumes this buffer is unallocated. When enic_rq_alloc_buf() is called for buffer allocation, it checks if buf->os_buf is NULL. If its not NULL that means buffer can be re-used. When vnic_rq_clean() is called for freeing all rq buffers, and if the rx_copybreak reused buffer falls outside the used desc, we do not free the buffer. The following trace is observer when dma-debug is enabled. Fix is to walk through complete ring and clean if buffer is present. [ 40.555386] ------------[ cut here ]------------ [ 40.555396] WARNING: CPU: 0 PID: 491 at lib/dma-debug.c:971 dma_debug_device_change+0x188/0x1f0() [ 40.555400] pci 0000:06:00.0: DMA-API: device driver has pending DMA allocations while released from device [count=4] One of leaked entries details: [device address=0x00000000ff4cc040] [size=9018 bytes] [mapped with DMA_FROM_DEVICE] [mapped as single] [ 40.555402] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 dns_resolver coretemp intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp kvm_intel kvm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 lrw joydev mousedev gf128mul hid_generic glue_helper mgag200 usbhid ttm hid drm_kms_helper drm ablk_helper syscopyarea sysfillrect sysimgblt i2c_algo_bit i2c_core iTCO_wdt cryptd mac_hid evdev pcspkr sb_edac edac_core tpm_tis iTCO_vendor_support ipmi_si wmi tpm ipmi_msghandler shpchp lpc_ich processor acpi_power_meter hwmon button ac sch_fq_codel nfs lockd grace sunrpc fscache sd_mod ehci_pci ehci_hcd megaraid_sas usbcore scsi_mod usb_common enic(-) crc32c_generic crc32c_intel btrfs xor raid6_pq ext4 crc16 mbcache jbd2 [ 40.555467] CPU: 0 PID: 491 Comm: rmmod Not tainted 4.1.0-rc7-ARCH-01305-gf59b71f #118 [ 40.555469] Hardware name: Cisco Systems Inc UCSB-B200-M4/UCSB-B200-M4, BIOS B200M4.2.2.2.23.061220140128 06/12/2014 [ 40.555471] 0000000000000000 00000000e2f8a5b7 ffff880275f8bc48 ffffffff8158d6f0 [ 40.555474] 0000000000000000 ffff880275f8bca0 ffff880275f8bc88 ffffffff8107b04a [ 40.555477] ffff8802734e0000 0000000000000004 ffff8804763fb3c0 ffff88027600b650 [ 40.555480] Call Trace: [ 40.555488] [] dump_stack+0x4f/0x7b [ 40.555492] [] warn_slowpath_common+0x8a/0xc0 [ 40.555494] [] warn_slowpath_fmt+0x55/0x70 [ 40.555498] [] dma_debug_device_change+0x188/0x1f0 [ 40.555503] [] notifier_call_chain+0x4f/0x80 [ 40.555506] [] __blocking_notifier_call_chain+0x4b/0x70 [ 40.555510] [] blocking_notifier_call_chain+0x16/0x20 [ 40.555514] [] __device_release_driver+0xf6/0x120 [ 40.555518] [] driver_detach+0xc8/0xd0 [ 40.555523] [] bus_remove_driver+0x59/0xe0 [ 40.555527] [] driver_unregister+0x30/0x70 [ 40.555534] [] pci_unregister_driver+0x2d/0xa0 [ 40.555542] [] enic_cleanup_module+0x10/0x14e [enic] [ 40.555547] [] SyS_delete_module+0x1cf/0x280 [ 40.555551] [] ? ____fput+0xe/0x10 [ 40.555554] [] ? task_work_run+0xbc/0xf0 [ 40.555558] [] system_call_fastpath+0x12/0x71 [ 40.555561] ---[ end trace 4988cadc77c2b236 ]--- [ 40.555562] Mapped at: [ 40.555563] [] debug_dma_map_page+0x95/0x150 [ 40.555566] [] enic_rq_alloc_buf+0x1b8/0x360 [enic] [ 40.555570] [] enic_open+0xf8/0x820 [enic] [ 40.555574] [] __dev_open+0xce/0x150 [ 40.555579] [] __dev_change_flags+0xa1/0x170 Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index 36a2ed6..c4b2183 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -188,16 +188,15 @@ void vnic_rq_clean(struct vnic_rq *rq, struct vnic_rq_buf *buf; u32 fetch_index; unsigned int count = rq->ring.desc_count; + int i; buf = rq->to_clean; - while (vnic_rq_desc_used(rq) > 0) { - + for (i = 0; i < rq->ring.desc_count; i++) { (*buf_clean)(rq, buf); - - buf = rq->to_clean = buf->next; - rq->ring.desc_avail++; + buf = buf->next; } + rq->ring.desc_avail = rq->ring.desc_count - 1; /* Use current fetch_index as the ring starting point */ fetch_index = ioread32(&rq->ctrl->fetch_index); -- cgit v0.10.2 From 58c98be137830d34b79024cc5dc95ef54fcd7ffe Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 11 Jun 2015 14:51:30 +0200 Subject: net: igb: fix the start time for periodic output signals When programming the start of a periodic output, the code wrongly places the seconds value into the "low" register and the nanoseconds into the "high" register. Even though this is backwards, it slipped through my testing, because the re-arming code in the interrupt service routine is correct, and the signal does appear starting with the second edge. This patch fixes the issue by programming the registers correctly. Signed-off-by: Richard Cochran Reviewed-by: Jacob Keller Acked-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index e3b9b63..c3a9392c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -538,8 +538,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, igb->perout[i].start.tv_nsec = rq->perout.start.nsec; igb->perout[i].period.tv_sec = ts.tv_sec; igb->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttiml, rq->perout.start.sec); - wr32(trgttimh, rq->perout.start.nsec); + wr32(trgttimh, rq->perout.start.sec); + wr32(trgttiml, rq->perout.start.nsec); tsauxc |= tsauxc_mask; tsim |= tsim_mask; } else { -- cgit v0.10.2 From 0fae3bf018d97b210051c8797a49d66d31071847 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 11 Jun 2015 19:58:26 +0100 Subject: mpls: handle device renames for per-device sysctls If a device is renamed and the original name is subsequently reused for a new device, the following warning is generated: sysctl duplicate entry: /net/mpls/conf/veth0//input CPU: 3 PID: 1379 Comm: ip Not tainted 4.1.0-rc4+ #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 0000000000000000 0000000000000000 ffffffff81566aaf 0000000000000000 ffffffff81236279 ffff88002f7d7f00 0000000000000000 ffff88000db336d8 ffff88000db33698 0000000000000005 ffff88002e046000 ffff8800168c9280 Call Trace: [] ? dump_stack+0x40/0x50 [] ? __register_sysctl_table+0x289/0x5a0 [] ? mpls_dev_notify+0x1ff/0x300 [mpls_router] [] ? notifier_call_chain+0x4f/0x70 [] ? register_netdevice+0x2b2/0x480 [] ? veth_newlink+0x178/0x2d3 [veth] [] ? rtnl_newlink+0x73c/0x8e0 [] ? rtnl_newlink+0x16a/0x8e0 [] ? __kmalloc_reserve.isra.30+0x32/0x90 [] ? rtnetlink_rcv_msg+0x8d/0x250 [] ? __alloc_skb+0x47/0x1f0 [] ? __netlink_lookup+0xab/0xe0 [] ? rtnetlink_rcv+0x30/0x30 [] ? netlink_rcv_skb+0xb0/0xd0 [] ? rtnetlink_rcv+0x24/0x30 [] ? netlink_unicast+0x107/0x1a0 [] ? netlink_sendmsg+0x50e/0x630 [] ? sock_sendmsg+0x3c/0x50 [] ? ___sys_sendmsg+0x27b/0x290 [] ? mem_cgroup_try_charge+0x88/0x110 [] ? mem_cgroup_commit_charge+0x56/0xa0 [] ? do_filp_open+0x30/0xa0 [] ? __sys_sendmsg+0x3e/0x80 [] ? system_call_fastpath+0x16/0x75 Fix this by unregistering the previous sysctl table (registered for the path containing the original device name) and re-registering the table for the path containing the new device name. Fixes: 37bde79979c3 ("mpls: Per-device enabling of packet input") Reported-by: Scott Feldman Signed-off-by: Robert Shearman Signed-off-by: David S. Miller diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bff427f..1f93a59 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -564,6 +564,17 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_UNREGISTER: mpls_ifdown(dev); break; + case NETDEV_CHANGENAME: + mdev = mpls_dev_get(dev); + if (mdev) { + int err; + + mpls_dev_sysctl_unregister(mdev); + err = mpls_dev_sysctl_register(dev, mdev); + if (err) + return notifier_from_errno(err); + } + break; } return NOTIFY_OK; } -- cgit v0.10.2 From fb05e7a89f500cfc06ae277bdc911b281928995d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Jun 2015 16:50:48 -0700 Subject: net: don't wait for order-3 page allocation We saw excessive direct memory compaction triggered by skb_page_frag_refill. This causes performance issues and add latency. Commit 5640f7685831e0 introduces the order-3 allocation. According to the changelog, the order-3 allocation isn't a must-have but to improve performance. But direct memory compaction has high overhead. The benefit of order-3 allocation can't compensate the overhead of direct memory compaction. This patch makes the order-3 page allocation atomic. If there is no memory pressure and memory isn't fragmented, the alloction will still success, so we don't sacrifice the order-3 benefit here. If the atomic allocation fails, direct memory compaction will not be triggered, skb_page_frag_refill will fallback to order-0 immediately, hence the direct memory compaction overhead is avoided. In the allocation failure case, kswapd is waken up and doing compaction, so chances are allocation could success next time. alloc_skb_with_frags is the same. The mellanox driver does similar thing, if this is accepted, we must fix the driver too. V3: fix the same issue in alloc_skb_with_frags as pointed out by Eric V2: make the changelog clearer Cc: Eric Dumazet Cc: Chris Mason Cc: Debabrata Banerjee Signed-off-by: Shaohua Li Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a..41ec022 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4398,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, while (order) { if (npages >= 1 << order) { - page = alloc_pages(gfp_mask | + page = alloc_pages((gfp_mask & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, diff --git a/net/core/sock.c b/net/core/sock.c index 469d603..dc30dc5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1880,7 +1880,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) pfrag->offset = 0; if (SKB_FRAG_PAGE_ORDER) { - pfrag->page = alloc_pages(gfp | __GFP_COMP | + pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { -- cgit v0.10.2 From ae36806a622aea5ac79f279cfccc82144967b6e7 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 11 Jun 2015 14:49:46 -0300 Subject: sctp: allow authenticating DATA chunks that are bundled with COOKIE_ECHO Currently, we can ask to authenticate DATA chunks and we can send DATA chunks on the same packet as COOKIE_ECHO, but if you try to combine both, the DATA chunk will be sent unauthenticated and peer won't accept it, leading to a communication failure. This happens because even though the data was queued after it was requested to authenticate DATA chunks, it was also queued before we could know that remote peer can handle authenticating, so sctp_auth_send_cid() returns false. The fix is whenever we set up an active key, re-check send queue for chunks that now should be authenticated. As a result, such packet will now contain COOKIE_ECHO + AUTH + DATA chunks, in that order. Reported-by: Liu Wei Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fb7976a..4f15b7d 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -381,13 +381,14 @@ nomem: } -/* Public interface to creat the association shared key. +/* Public interface to create the association shared key. * See code above for the algorithm. */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; + struct sctp_chunk *chunk; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. @@ -410,6 +411,14 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; + /* Update send queue in case any chunk already in there now + * needs authenticating + */ + list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { + if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) + chunk->auth = 1; + } + return 0; } -- cgit v0.10.2 From b07d496177cd3bc4b70fb8a5e85ede24cb403a11 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sat, 13 Jun 2015 00:23:21 +0900 Subject: Doc: networking: Fix URL for wiki.wireshark.org in udplite.txt This patch fix URL (http to https) for wiki.wireshark.org. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt index d727a38..53a7268 100644 --- a/Documentation/networking/udplite.txt +++ b/Documentation/networking/udplite.txt @@ -20,7 +20,7 @@ files/UDP-Lite-HOWTO.txt o The Wireshark UDP-Lite WiKi (with capture files): - http://wiki.wireshark.org/Lightweight_User_Datagram_Protocol + https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt -- cgit v0.10.2