From 68c11e98ef6748ddb63865799b12fc45abb3755d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 2 Apr 2015 10:58:24 +0300 Subject: xfrm: fix xfrm_input/xfrm_tunnel_check oops https://bugzilla.kernel.org/show_bug.cgi?id=95211 Commit 70be6c91c86596ad2b60c73587880b47df170a41 ("xfrm: Add xfrm_tunnel_skb_cb to the skb common buffer") added check which dereferences ->outer_mode too early but larval SAs don't have this pointer set (yet). So check for tunnel stuff later. Mike Noordermeer reported this bug and patiently applied all the debugging. Technically this is remote-oops-in-interrupt-context type of thing. BUG: unable to handle kernel NULL pointer dereference at 0000000000000034 IP: [] xfrm_input+0x3c2/0x5a0 ... [] ? xfrm4_esp_rcv+0x36/0x70 [] ? ip_local_deliver_finish+0x9a/0x200 [] ? __netif_receive_skb_core+0x6f3/0x8f0 ... RIP [] xfrm_input+0x3c2/0x5a0 Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 85d1d47..526c4fe 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -238,11 +238,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; - if (xfrm_tunnel_check(skb, x, family)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - spin_lock(&x->lock); if (unlikely(x->km.state == XFRM_STATE_ACQ)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -271,6 +266,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); XFRM_SKB_CB(skb)->seq.input.low = seq; -- cgit v0.10.2 From 092a29a40bab8bb4530bb3e58a0597001cdecdef Mon Sep 17 00:00:00 2001 From: Yao Xiwei Date: Thu, 2 Apr 2015 17:31:17 +0200 Subject: vti6: fix uninit when using x-netns When the kernel deleted a vti6 interface, this interface was not removed from the tunnels list. Thus, when the ip6_vti module was removed, this old interface was found and the kernel tried to delete it again. This was leading to a kernel panic. Fixes: 61220ab34948 ("vti6: Enable namespace changing") Signed-off-by: Yao Xiwei Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5fb9e21..a4ac850 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -288,8 +288,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, static void vti6_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); -- cgit v0.10.2 From 0ad2a8365975d6794d79a4e4dde60fcc036692c7 Mon Sep 17 00:00:00 2001 From: "Beshay, Joseph" Date: Mon, 6 Apr 2015 18:00:56 +0000 Subject: netem: Fixes byte backlog accounting for the first of two chained netem instances Fixes byte backlog accounting for the first of two chained netem instances. Bytes backlog reported now corresponds to the number of queued packets. When two netem instances are chained, for instance to apply rate and queue limitation followed by packet delay, the number of backlogged bytes reported by the first netem instance is wrong. It reports the sum of bytes in the queues of the first and second netem. The first netem reports the correct number of backlogged packets but not bytes. This is shown in the example below. Consider a chain of two netem schedulers created using the following commands: $ tc -s qdisc replace dev veth2 root handle 1:0 netem rate 10000kbit limit 100 $ tc -s qdisc add dev veth2 parent 1:0 handle 2: netem delay 50ms Start an iperf session to send packets out on the specified interface and monitor the backlog using tc: $ tc -s qdisc show dev veth2 Output using unpatched netem: qdisc netem 1: root refcnt 2 limit 100 rate 10000Kbit Sent 98422639 bytes 65434 pkt (dropped 123, overlimits 0 requeues 0) backlog 172694b 73p requeues 0 qdisc netem 2: parent 1: limit 1000 delay 50.0ms Sent 98422639 bytes 65434 pkt (dropped 0, overlimits 0 requeues 0) backlog 63588b 42p requeues 0 The interface used to produce this output has an MTU of 1500. The output for backlogged bytes behind netem 1 is 172694b. This value is not correct. Consider the total number of sent bytes and packets. By dividing the number of sent bytes by the number of sent packets, we get an average packet size of ~=1504. If we divide the number of backlogged bytes by packets, we get ~=2365. This is due to the first netem incorrectly counting the 63588b which are in netem 2's queue as being in its own queue. To verify this is the case, we subtract them from the reported value and divide by the number of packets as follows: 172694 - 63588 = 109106 bytes actualled backlogged in netem 1 109106 / 73 packets ~= 1494 bytes (which matches our MTU) The root cause is that the byte accounting is not done at the same time with packet accounting. The solution is to update the backlog value every time the packet queue is updated. Signed-off-by: Joseph D Beshay Acked-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 179f1c8..956ead2 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -560,8 +560,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) tfifo_dequeue: skb = __skb_dequeue(&sch->q); if (skb) { -deliver: qdisc_qstats_backlog_dec(sch, skb); +deliver: qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; @@ -578,6 +578,7 @@ deliver: rb_erase(p, &q->t_root); sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; skb->tstamp = netem_skb_cb(skb)->tstamp_save; -- cgit v0.10.2 From ebe96e641dee2cbd135ee802ae7e40c361640088 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 8 Apr 2015 12:33:45 -0400 Subject: RDS: Documentation: Document AF_RDS, PF_RDS and SOL_RDS correctly. AF_RDS, PF_RDS and SOL_RDS are available in header files, and there is no need to get their values from /proc. Document this correctly. Fixes: 0c5f9b8830aa ("RDS: Documentation") Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt index c67077c..e1a3d59 100644 --- a/Documentation/networking/rds.txt +++ b/Documentation/networking/rds.txt @@ -62,11 +62,10 @@ Socket Interface ================ AF_RDS, PF_RDS, SOL_RDS - These constants haven't been assigned yet, because RDS isn't in - mainline yet. Currently, the kernel module assigns some constant - and publishes it to user space through two sysctl files - /proc/sys/net/rds/pf_rds - /proc/sys/net/rds/sol_rds + AF_RDS and PF_RDS are the domain type to be used with socket(2) + to create RDS sockets. SOL_RDS is the socket-level to be used + with setsockopt(2) and getsockopt(2) for RDS specific socket + options. fd = socket(PF_RDS, SOCK_SEQPACKET, 0); This creates a new, unbound RDS socket. -- cgit v0.10.2 From 1789b2c077f6d6c82b04cfe49a0fec020dc42488 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 8 Apr 2015 12:33:46 -0400 Subject: RDS: only use passive connections when addresses match Passive connections were added for the case where one loopback IB connection between identical addresses needs another connection to store the second QP. Unfortunately, they were also created in the case where the addesses differ and we already have both QPs. This lead to a message reordering bug. - two different IB interfaces and addresses on a machine: A B - traffic is sent from A to B - connection from A-B is created, connect request sent - listening accepts connect request, B-A is created - traffic flows, next_rx is incremented - unacked messages exist on the retrans list - connection A-B is shut down, new connect request sent - listen sees existing loopback B-A, creates new passive B-A - retrans messages are sent and delivered because of 0 next_rx The problem is that the second connection request saw the previously existing parent connection. Instead of using it, and using the existing next_rx_seq state for the traffic between those IPs, it mistakenly thought that it had to create a passive connection. We fix this by only using passive connections in the special case where laddr and faddr match. In this case we'll only ever have one parent sending connection requests and one passive connection created as the listening path sees the existing parent connection which initiated the request. Original patch by Zach Brown Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller diff --git a/net/rds/connection.c b/net/rds/connection.c index 378c3a6..7952a5b 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -130,7 +130,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, rcu_read_lock(); conn = rds_conn_lookup(head, laddr, faddr, trans); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && - !is_outgoing) { + laddr == faddr && !is_outgoing) { /* This is a looped back IB connection, and we're * called by the code handling the incoming connect. * We need a second connection object into which we -- cgit v0.10.2 From 443be0e5affe3acb6dd81e7402951677e0a0eb35 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 8 Apr 2015 12:33:47 -0400 Subject: RDS: make sure not to loop forever inside rds_send_xmit If a determined set of concurrent senders keep the send queue full, we can loop forever inside rds_send_xmit. This fix has two parts. First we are dropping out of the while(1) loop after we've processed a large batch of messages. Second we add a generation number that gets bumped each time the xmit bit lock is acquired. If someone else has jumped in and made progress in the queue, we skip our goto restart. Original patch by Chris Mason. Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller diff --git a/net/rds/connection.c b/net/rds/connection.c index 7952a5b..14f04139 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -193,6 +193,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, } atomic_set(&conn->c_state, RDS_CONN_DOWN); + conn->c_send_gen = 0; conn->c_reconnect_jiffies = 0; INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); diff --git a/net/rds/rds.h b/net/rds/rds.h index c2a5eef..02d8fd5 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -110,6 +110,7 @@ struct rds_connection { void *c_transport_data; atomic_t c_state; + unsigned long c_send_gen; unsigned long c_flags; unsigned long c_reconnect_jiffies; struct delayed_work c_send_w; diff --git a/net/rds/send.c b/net/rds/send.c index 42f65d4..49f77ef 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -140,8 +140,11 @@ int rds_send_xmit(struct rds_connection *conn) struct scatterlist *sg; int ret = 0; LIST_HEAD(to_be_dropped); + int batch_count; + unsigned long send_gen = 0; restart: + batch_count = 0; /* * sendmsg calls here after having queued its message on the send @@ -157,6 +160,17 @@ restart: } /* + * we record the send generation after doing the xmit acquire. + * if someone else manages to jump in and do some work, we'll use + * this to avoid a goto restart farther down. + * + * The acquire_in_xmit() check above ensures that only one + * caller can increment c_send_gen at any time. + */ + conn->c_send_gen++; + send_gen = conn->c_send_gen; + + /* * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT, * we do the opposite to avoid races. */ @@ -202,6 +216,16 @@ restart: if (!rm) { unsigned int len; + batch_count++; + + /* we want to process as big a batch as we can, but + * we also want to avoid softlockups. If we've been + * through a lot of messages, lets back off and see + * if anyone else jumps in + */ + if (batch_count >= 1024) + goto over_batch; + spin_lock_irqsave(&conn->c_lock, flags); if (!list_empty(&conn->c_send_queue)) { @@ -357,9 +381,9 @@ restart: } } +over_batch: if (conn->c_trans->xmit_complete) conn->c_trans->xmit_complete(conn); - release_in_xmit(conn); /* Nuke any messages we decided not to retransmit. */ @@ -380,10 +404,15 @@ restart: * If the transport cannot continue (i.e ret != 0), then it must * call us when more room is available, such as from the tx * completion handler. + * + * We have an extra generation check here so that if someone manages + * to jump in after our release_in_xmit, we'll see that they have done + * some work and we will skip our goto */ if (ret == 0) { smp_mb(); - if (!list_empty(&conn->c_send_queue)) { + if (!list_empty(&conn->c_send_queue) && + send_gen == conn->c_send_gen) { rds_stats_inc(s_send_lock_queue_raced); goto restart; } -- cgit v0.10.2 From 3bca4cf6022ee2ff1ff2142bed28a6fca00d3a44 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Wed, 8 Apr 2015 12:15:18 +0200 Subject: net: phy: broadcom: Add BCM54616S phy entry Signed-off-by: Alessio Igor Bogani Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 16adbc4..8fadaa1 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -68,8 +68,8 @@ config SMSC_PHY config BROADCOM_PHY tristate "Drivers for Broadcom PHYs" ---help--- - Currently supports the BCM5411, BCM5421, BCM5461, BCM5464, BCM5481 - and BCM5482 PHYs. + Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, + BCM5481 and BCM5482 PHYs. config BCM63XX_PHY tristate "Drivers for Broadcom 63xx SOCs internal PHY" diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index a52afb2..9c71295 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -549,6 +549,19 @@ static struct phy_driver broadcom_drivers[] = { .config_intr = bcm54xx_config_intr, .driver = { .owner = THIS_MODULE }, }, { + .phy_id = PHY_ID_BCM54616S, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM54616S", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm54xx_ack_interrupt, + .config_intr = bcm54xx_config_intr, + .driver = { .owner = THIS_MODULE }, +}, { .phy_id = PHY_ID_BCM5464, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5464", @@ -660,6 +673,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5411, 0xfffffff0 }, { PHY_ID_BCM5421, 0xfffffff0 }, { PHY_ID_BCM5461, 0xfffffff0 }, + { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 7ccd928..1c9920b 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM7250 0xae025280 -- cgit v0.10.2 From 5eeb2922152042b78eccfb6cf70458019296654f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 8 Apr 2015 06:04:31 -0700 Subject: fou: Don't use const __read_mostly const __read_mostly is a senseless combination. If something is already const it cannot be __read_mostly. Remove the bogus __read_mostly in the fou driver. This fixes section conflicts with LTO. Signed-off-by: Andi Kleen Signed-off-by: David S. Miller diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index ff069f6..335e752 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -771,12 +771,12 @@ EXPORT_SYMBOL(gue_build_header); #ifdef CONFIG_NET_FOU_IP_TUNNELS -static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = { +static const struct ip_tunnel_encap_ops fou_iptun_ops = { .encap_hlen = fou_encap_hlen, .build_header = fou_build_header, }; -static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = { +static const struct ip_tunnel_encap_ops gue_iptun_ops = { .encap_hlen = gue_encap_hlen, .build_header = gue_build_header, }; -- cgit v0.10.2 From 69304cc986bdcd7a09df179aeda3736f7e1544a5 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Wed, 8 Apr 2015 16:59:48 -0500 Subject: be2net: Fix a bug in Rx buffer posting The numPosted field in the ERX Doorbell register is 8-bits wide. So the max buffers that we can post at a time is 255 and not 256 which we are doing currently. Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 27b9fe9..204ec43 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -97,6 +97,7 @@ #define BE_NAPI_WEIGHT 64 #define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */ #define RX_FRAGS_REFILL_WM (RX_Q_LEN - MAX_RX_POST) +#define MAX_NUM_POST_ERX_DB 255u #define MAX_VFS 30 /* Max VFs supported by BE3 FW */ #define FW_VER_LEN 32 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e6b790f..ad2b509 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2032,7 +2032,7 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) if (rxo->rx_post_starved) rxo->rx_post_starved = false; do { - notify = min(256u, posted); + notify = min(MAX_NUM_POST_ERX_DB, posted); be_rxq_notify(adapter, rxq->id, notify); posted -= notify; } while (posted); -- cgit v0.10.2 From 27015f8c17578e14b3c6cc098b915b0f8db3ac36 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Wed, 8 Apr 2015 18:41:34 -0700 Subject: stmmac: devm_reset_control_get can return PROBE_DEFER In socfpga_dwmac_parse_data forward error code from devm_reset_control_get. This gives the driver another chance to laod if altr,rst-mgr is loaded after the network driver. Signed-off-by: Phil Reid Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index e97074c..1cacea9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -91,7 +91,7 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * STMMAC_RESOURCE_NAME); if (IS_ERR(dwmac->stmmac_rst)) { dev_info(dev, "Could not get reset control!\n"); - return -EINVAL; + return PTR_ERR(dwmac->stmmac_rst); } dwmac->interface = of_get_phy_mode(np); -- cgit v0.10.2 From 11f17ef3154fc8a7876a9ada1d1b80d41106960a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 9 Apr 2015 10:09:04 +0200 Subject: usbnet: rename work handler "kevent" is an extremely generic name that causes trouble if debugging for work queues is used. So change it to something clear. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 777757a..733f4fe 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1072,7 +1072,7 @@ static void __handle_set_rx_mode(struct usbnet *dev) * especially now that control transfers can be queued. */ static void -kevent (struct work_struct *work) +usbnet_deferred_kevent (struct work_struct *work) { struct usbnet *dev = container_of(work, struct usbnet, kevent); @@ -1626,7 +1626,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init(&dev->rxq_pause); dev->bh.func = usbnet_bh; dev->bh.data = (unsigned long) dev; - INIT_WORK (&dev->kevent, kevent); + INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); dev->delay.function = usbnet_bh; dev->delay.data = (unsigned long) dev; -- cgit v0.10.2 From b736a623bd099cdf5521ca9bd03559f3bc7fa31c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 9 Apr 2015 11:19:14 -0700 Subject: udptunnels: Call handle_offloads after inserting vlan tag. handle_offloads() calls skb_reset_inner_headers() to store the layer pointers to the encapsulated packet. However, we currently push the vlag tag (if there is one) onto the packet afterwards. This changes the MAC header for the encapsulated packet but it is not reflected in skb->inner_mac_header, which breaks GSO and drivers which attempt to use this for encapsulation offloads. Fixes: 1eaa8178 ("vxlan: Add tx-vlan offload support.") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f8528a4..fceb637 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1713,12 +1713,6 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, } } - skb = iptunnel_handle_offloads(skb, udp_sum, type); - if (IS_ERR(skb)) { - err = -EINVAL; - goto err; - } - skb_scrub_packet(skb, xnet); min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len @@ -1738,6 +1732,12 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, goto err; } + skb = iptunnel_handle_offloads(skb, udp_sum, type); + if (IS_ERR(skb)) { + err = -EINVAL; + goto err; + } + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_HF_VNI); vxh->vx_vni = md->vni; @@ -1798,10 +1798,6 @@ int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, } } - skb = iptunnel_handle_offloads(skb, udp_sum, type); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + VXLAN_HLEN + sizeof(struct iphdr) + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); @@ -1817,6 +1813,10 @@ int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, if (WARN_ON(!skb)) return -ENOMEM; + skb = iptunnel_handle_offloads(skb, udp_sum, type); + if (IS_ERR(skb)) + return PTR_ERR(skb); + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_HF_VNI); vxh->vx_vni = md->vni; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 5a4828b..a566a2e 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, csum); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); @@ -131,6 +127,10 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, if (unlikely(!skb)) return -ENOMEM; + skb = udp_tunnel_handle_offloads(skb, csum); + if (IS_ERR(skb)) + return PTR_ERR(skb); + gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); -- cgit v0.10.2 From b50edd7812852d989f2ef09dcfc729690f54a42d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2015 13:31:56 -0700 Subject: tcp: tcp_make_synack() should clear skb->tstamp I noticed tcpdump was giving funky timestamps for locally generated SYNACK messages on loopback interface. 11:42:46.938990 IP 127.0.0.1.48245 > 127.0.0.2.23850: S 945476042:945476042(0) win 43690 20:28:58.502209 IP 127.0.0.2.23850 > 127.0.0.1.48245: S 3160535375:3160535375(0) ack 945476043 win 43690 This is because we need to clear skb->tstamp before entering lower stack, otherwise net_timestamp_check() does not set skb->tstamp. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1db253e..d520492 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2929,6 +2929,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } #endif + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp.tv64 = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); -- cgit v0.10.2