From bbf178e0a0a323d896611905718db112b1ab1807 Mon Sep 17 00:00:00 2001 From: "xypron.glpk@gmx.de" Date: Wed, 18 May 2016 20:40:51 +0200 Subject: net: pegasus: simplify logical constraint If !count is true, count < 4 is also true. Signed-off-by: Heinrich Schuchardt Signed-off-by: David S. Miller diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 36cd7f0..9bbe0161 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -473,7 +473,7 @@ static void read_bulk_callback(struct urb *urb) goto goon; } - if (!count || count < 4) + if (count < 4) goto goon; rx_status = buf[count - 2]; -- cgit v0.10.2 From c685293aa379b9d17e44fcc2f96a402b7648e838 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 May 2016 17:42:13 -0700 Subject: net/atm: sk_err_soft must be positive The sk_err and sk_err_soft fields are positive errno values and userspace applications rely on this when using getsockopt(SO_ERROR). ATM code places an -errno into sk_err_soft in sigd_send() and returns it from svc_addparty()/svc_dropparty(). Although I am not familiar with ATM code I came to this conclusion because: 1. sigd_send() msg->type cases as_okay and as_error both have: sk->sk_err = -msg->reply; while the as_addparty and as_dropparty cases have: sk->sk_err_soft = msg->reply; This is the source of the inconsistency. 2. svc_addparty() returns an -errno and assumes sk_err_soft is also an -errno: if (flags & O_NONBLOCK) { error = -EINPROGRESS; goto out; } ... error = xchg(&sk->sk_err_soft, 0); out: release_sock(sk); return error; This shows that sk_err_soft is indeed being treated as an -errno. This patch ensures that sk_err_soft is always a positive errno. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 4fd6af4..adb6e3d 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -124,7 +124,7 @@ as_indicate_complete: break; case as_addparty: case as_dropparty: - sk->sk_err_soft = msg->reply; + sk->sk_err_soft = -msg->reply; /* < 0 failure, otherwise ep_ref */ clear_bit(ATM_VF_WAITING, &vcc->flags); break; diff --git a/net/atm/svc.c b/net/atm/svc.c index 3fa0a9e..878563a 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -546,7 +546,7 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr, schedule(); } finish_wait(sk_sleep(sk), &wait); - error = xchg(&sk->sk_err_soft, 0); + error = -xchg(&sk->sk_err_soft, 0); out: release_sock(sk); return error; @@ -573,7 +573,7 @@ static int svc_dropparty(struct socket *sock, int ep_ref) error = -EUNATCH; goto out; } - error = xchg(&sk->sk_err_soft, 0); + error = -xchg(&sk->sk_err_soft, 0); out: release_sock(sk); return error; -- cgit v0.10.2 From 8da07a393f48864a87c61bb1dfbee640d9b97fd6 Mon Sep 17 00:00:00 2001 From: "xypron.glpk@gmx.de" Date: Thu, 19 May 2016 21:20:55 +0200 Subject: net: hns: avoid null pointer dereference In the statement assert(priv || priv->ae_handle); the right side of || is only evaluated if priv is null. v2: As suggested by David Leight and David Miller the assert statements are removed. Signed-off-by: Heinrich Schuchardt Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 3d746c8..67a648c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -46,7 +46,6 @@ static u32 hns_nic_get_link(struct net_device *net_dev) u32 link_stat = priv->link; struct hnae_handle *h; - assert(priv && priv->ae_handle); h = priv->ae_handle; if (priv->phy) { @@ -646,8 +645,6 @@ static void hns_nic_get_drvinfo(struct net_device *net_dev, { struct hns_nic_priv *priv = netdev_priv(net_dev); - assert(priv); - strncpy(drvinfo->version, HNAE_DRIVER_VERSION, sizeof(drvinfo->version)); drvinfo->version[sizeof(drvinfo->version) - 1] = '\0'; @@ -720,8 +717,6 @@ static int hns_set_pauseparam(struct net_device *net_dev, struct hnae_handle *h; struct hnae_ae_ops *ops; - assert(priv || priv->ae_handle); - h = priv->ae_handle; ops = h->dev->ops; @@ -780,8 +775,6 @@ static int hns_set_coalesce(struct net_device *net_dev, struct hnae_ae_ops *ops; int ret; - assert(priv || priv->ae_handle); - ops = priv->ae_handle->dev->ops; if (ec->tx_coalesce_usecs != ec->rx_coalesce_usecs) @@ -1111,8 +1104,6 @@ void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd, struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_ae_ops *ops; - assert(priv || priv->ae_handle); - ops = priv->ae_handle->dev->ops; cmd->version = HNS_CHIP_VERSION; @@ -1135,8 +1126,6 @@ static int hns_get_regs_len(struct net_device *net_dev) struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_ae_ops *ops; - assert(priv || priv->ae_handle); - ops = priv->ae_handle->dev->ops; if (!ops->get_regs_len) { netdev_err(net_dev, "ops->get_regs_len is null!\n"); -- cgit v0.10.2 From 2ece068e1b1d6e76ecd07e04b027013e931287ff Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Fri, 20 May 2016 17:51:02 +0530 Subject: ptp: use memdup_user(). Use memdup_user to duplicate a memory region from user-space to kernel-space, instead of open coding using kmalloc & copy_from_user. Signed-off-by: Muhammad Falak R Wani Acked-by: Richard Cochran Signed-off-by: David S. Miller diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 579fd65..0b1ac6b 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -208,14 +208,9 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) break; case PTP_SYS_OFFSET: - sysoff = kmalloc(sizeof(*sysoff), GFP_KERNEL); - if (!sysoff) { - err = -ENOMEM; - break; - } - if (copy_from_user(sysoff, (void __user *)arg, - sizeof(*sysoff))) { - err = -EFAULT; + sysoff = memdup_user((void __user *)arg, sizeof(*sysoff)); + if (IS_ERR(sysoff)) { + err = PTR_ERR(sysoff); break; } if (sysoff->n_samples > PTP_MAX_SAMPLES) { -- cgit v0.10.2 From 049bbf589ec651685205bd8ce73221fdd62345cf Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 20 May 2016 13:21:10 -0300 Subject: ipv4: Fix non-initialized TTL when CONFIG_SYSCTL=n Commit fa50d974d104 ("ipv4: Namespaceify ip_default_ttl sysctl knob") moves the default TTL assignment, and as side-effect IPv4 TTL now has a default value only if sysctl support is enabled (CONFIG_SYSCTL=y). The sysctl_ip_default_ttl is fundamental for IP to work properly, as it provides the TTL to be used as default. The defautl TTL may be used in ip_selected_ttl, through the following flow: ip_select_ttl ip4_dst_hoplimit net->ipv4.sysctl_ip_default_ttl This commit fixes the issue by assigning net->ipv4.sysctl_ip_default_ttl in net_init_net, called during ipv4's initialization. Without this commit, a kernel built without sysctl support will send all IP packets with zero TTL (unless a TTL is explicitly set, e.g. with setsockopt). Given a similar issue might appear on the other knobs that were namespaceify, this commit also moves them. Fixes: fa50d974d104 ("ipv4: Namespaceify ip_default_ttl sysctl knob") Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 377424e..d39e9e4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1681,6 +1681,14 @@ static __net_init int inet_init_net(struct net *net) */ net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); + + /* Default values for sysctl-controlled parameters. + * We set them here, in case sysctl is not compiled. + */ + net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; + net->ipv4.sysctl_ip_dynaddr = 0; + net->ipv4.sysctl_ip_early_demux = 1; + return 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index bb04195..1cb67de 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -999,10 +999,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!net->ipv4.sysctl_local_reserved_ports) goto err_ports; - net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; - net->ipv4.sysctl_ip_dynaddr = 0; - net->ipv4.sysctl_ip_early_demux = 1; - return 0; err_ports: -- cgit v0.10.2 From 156f4fbcc2bd1ce38da483f932d3963ba9a79ad0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:06:09 +0200 Subject: MAINTAINERS: Add file patterns for net device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 5f83015..8d39715 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7832,6 +7832,7 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git S: Odd Fixes +F: Documentation/devicetree/bindings/net/ F: drivers/net/ F: include/linux/if_* F: include/linux/netdevice.h -- cgit v0.10.2 From 612bacad78ba6d0a91166fc4487af114bac172a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 22 May 2016 23:16:18 +0200 Subject: bpf, inode: disallow userns mounts Follow-up to commit e27f4a942a0e ("bpf: Use mount_nodev not mount_ns to mount the bpf filesystem"), which removes the FS_USERNS_MOUNT flag. The original idea was to have a per mountns instance instead of a single global fs instance, but that didn't work out and we had to switch to mount_nodev() model. The intent of that middle ground was that we avoid users who don't play nice to create endless instances of bpf fs which are difficult to control and discover from an admin point of view, but at the same time it would have allowed us to be more flexible with regard to namespaces. Therefore, since we now did the switch to mount_nodev() as a fix where individual instances are created, we also need to remove userns mount flag along with it to avoid running into mentioned situation. I don't expect any breakage at this early point in time with removing the flag and we can revisit this later should the requirement for this come up with future users. This and commit e27f4a942a0e have been split to facilitate tracking should any of them run into the unlikely case of causing a regression. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 04be702..318858e 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -365,7 +365,6 @@ static struct file_system_type bpf_fs_type = { .name = "bpf", .mount = bpf_mount, .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT, }; MODULE_ALIAS_FS("bpf"); -- cgit v0.10.2 From 54b9430f04b0be3beb7f2711152c61e7d39c58e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 23 May 2016 13:19:35 +0300 Subject: qed: signedness bug in qed_dcbx_process_tlv() "priority" needs to be signed for the error handling to work. Fixes: 39651abd2814 ('qed: add support for dcbx.') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index cbf58e1..a06d19a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -192,9 +192,10 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_entry *p_tbl, u32 pri_tc_tbl, int count, bool dcbx_enabled) { - u8 tc, priority, priority_map; + u8 tc, priority_map; enum dcbx_protocol_type type; u16 protocol_id; + int priority; bool enable; int i; -- cgit v0.10.2 From 1b227e536662eae4b7beee3037edbecdc577495f Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sat, 21 May 2016 18:17:34 +0800 Subject: ip6_gre: Fix MTU setting for ip6gretap When creat an ip6gretap interface with an unreachable route, the MTU is about 14 bytes larger than what was needed. If the remote address is reachable: ping6 2001:0:130::1 -c 2 PING 2001:0:130::1(2001:0:130::1) 56 data bytes 64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms 64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index af503f5..a6fe339 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1027,6 +1027,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; -- cgit v0.10.2 From 252f3f5a1189a7f6c309d8e4ff1c4c1888a27f13 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sat, 21 May 2016 18:17:35 +0800 Subject: ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path. In gre6 xmit path, we are sending a GRE packet, so set fl6 proto to IPPROTO_GRE properly. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a6fe339..f4ac284 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -712,6 +712,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; + fl6->flowi6_proto = IPPROTO_GRE; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; -- cgit v0.10.2 From 3275c0c6c522ab04afa14f80efdac6213c3883d6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 23 May 2016 11:58:28 +1000 Subject: net/qlge: Avoids recursive EEH error One timer, whose handler keeps reading on MMIO register for EEH core to detect error in time, is started when the PCI device driver is loaded. MMIO register can't be accessed during PE reset in EEH recovery. Otherwise, the unexpected recursive error is triggered. The timer isn't closed that time if the interface isn't brought up. So the unexpected recursive error is seen during EEH recovery when the interface is down. This avoids the unexpected recursive EEH error by closing the timer in qlge_io_error_detected() before EEH PE reset unconditionally. The timer is started unconditionally after EEH PE reset in qlge_io_resume(). Also, the timer should be closed unconditionally when the device is removed from the system permanently in qlge_io_error_detected(). Reported-by: Shriya R. Kulkarni Signed-off-by: Gavin Shan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 83d7210..fd5d1c9 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4846,7 +4846,6 @@ static void ql_eeh_close(struct net_device *ndev) } /* Disabling the timer */ - del_timer_sync(&qdev->timer); ql_cancel_all_work_sync(qdev); for (i = 0; i < qdev->rss_ring_count; i++) @@ -4873,6 +4872,7 @@ static pci_ers_result_t qlge_io_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_CAN_RECOVER; case pci_channel_io_frozen: netif_device_detach(ndev); + del_timer_sync(&qdev->timer); if (netif_running(ndev)) ql_eeh_close(ndev); pci_disable_device(pdev); @@ -4880,6 +4880,7 @@ static pci_ers_result_t qlge_io_error_detected(struct pci_dev *pdev, case pci_channel_io_perm_failure: dev_err(&pdev->dev, "%s: pci_channel_io_perm_failure.\n", __func__); + del_timer_sync(&qdev->timer); ql_eeh_close(ndev); set_bit(QL_EEH_FATAL, &qdev->flags); return PCI_ERS_RESULT_DISCONNECT; -- cgit v0.10.2 From a9efad8b24bd22616f6c749a6c029957dc76542b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 May 2016 14:24:56 -0700 Subject: net_sched: avoid too many hrtimer_start() calls I found a serious performance bug in packet schedulers using hrtimers. sch_htb and sch_fq are definitely impacted by this problem. We constantly rearm high resolution timers if some packets are throttled in one (or more) class, and other packets are flying through qdisc on another (non throttled) class. hrtimer_start() does not have the mod_timer() trick of doing nothing if expires value does not change : if (timer_pending(timer) && timer->expires == expires) return 1; This issue is particularly visible when multiple cpus can queue/dequeue packets on the same qdisc, as hrtimer code has to lock a remote base. I used following fix : 1) Change htb to use qdisc_watchdog_schedule_ns() instead of open-coding it. 2) Cache watchdog prior expiration. hrtimer might provide this, but I prefer to not rely on some hrtimer internal. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 401038d..fea53f4 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -61,6 +61,7 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) } struct qdisc_watchdog { + u64 last_expires; struct hrtimer timer; struct Qdisc *qdisc; }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 64f71a2..ddf047d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -607,6 +607,10 @@ void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool thr if (throttle) qdisc_throttled(wd->qdisc); + if (wd->last_expires == expires) + return; + + wd->last_expires = expires; hrtimer_start(&wd->timer, ns_to_ktime(expires), HRTIMER_MODE_ABS_PINNED); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index f6bf581..d4b4218 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -928,17 +928,10 @@ ok: } } qdisc_qstats_overlimit(sch); - if (likely(next_event > q->now)) { - if (!test_bit(__QDISC_STATE_DEACTIVATED, - &qdisc_root_sleeping(q->watchdog.qdisc)->state)) { - ktime_t time = ns_to_ktime(next_event); - qdisc_throttled(q->watchdog.qdisc); - hrtimer_start(&q->watchdog.timer, time, - HRTIMER_MODE_ABS_PINNED); - } - } else { + if (likely(next_event > q->now)) + qdisc_watchdog_schedule_ns(&q->watchdog, next_event, true); + else schedule_work(&q->work); - } fin: return skb; } -- cgit v0.10.2 From 3d3ed18151172c845a11b7c184f2120220ae19fc Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 23 May 2016 21:07:20 -0400 Subject: net sched actions: policer missing timestamp processing Policer was not dumping or updating timestamps Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index eba5914..f4297c8 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -145,6 +145,8 @@ enum { TCA_POLICE_PEAKRATE, TCA_POLICE_AVRATE, TCA_POLICE_RESULT, + TCA_POLICE_TM, + TCA_POLICE_PAD, __TCA_POLICE_MAX #define TCA_POLICE_RESULT TCA_POLICE_RESULT }; @@ -173,7 +175,7 @@ enum { TCA_U32_DIVISOR, TCA_U32_SEL, TCA_U32_POLICE, - TCA_U32_ACT, + TCA_U32_ACT, TCA_U32_INDEV, TCA_U32_PCNT, TCA_U32_MARK, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 330f14e..b884dae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -239,6 +239,8 @@ override: police->tcfp_t_c = ktime_get_ns(); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(tn); + police->tcf_tm.install = jiffies; + police->tcf_tm.lastuse = jiffies; h = tcf_hash(police->tcf_index, POL_TAB_MASK); spin_lock_bh(&hinfo->lock); hlist_add_head(&police->tcf_head, &hinfo->htab[h]); @@ -268,6 +270,7 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, spin_lock(&police->tcf_lock); bstats_update(&police->tcf_bstats, skb); + tcf_lastuse_update(&police->tcf_tm); if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { @@ -327,6 +330,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) .refcnt = police->tcf_refcnt - ref, .bindcnt = police->tcf_bindcnt - bind, }; + struct tcf_t t; if (police->rate_present) psched_ratecfg_getrate(&opt.rate, &police->rate); @@ -340,6 +344,13 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (police->tcfp_ewma_rate && nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate)) goto nla_put_failure; + + t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(police->tcf_tm.expires); + if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD)) + goto nla_put_failure; + return skb->len; nla_put_failure: -- cgit v0.10.2 From 3851112e4737cd52aaeda0ce8d084be9ee128106 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Ravipati Date: Sun, 22 May 2016 23:59:00 -0700 Subject: ethtool: add support for 25G/50G/100G speed modes This patch enhances ethtool link mode bitmap to include 25G/50G/100G speed along with interface modes Signed-off-by: Vidya Sagar Ravipati Signed-off-by: David S. Miller diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9222db8..5f030b4 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1353,6 +1353,15 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28, ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1361,7 +1370,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, + = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ -- cgit v0.10.2 From ec7c7f5cafdaeea509ad4e7e6f71c2312062c4c6 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 24 May 2016 05:25:23 -0400 Subject: qed: Reset the enable flag for eth protocol. This patch fixes the coding error in determining the enable flag for the application/protocol. The enable flag should be set for all protocols but the eth. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index a06d19a..21ec1c2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -222,7 +222,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, * indication, but we only got here if there was an * app tlv for the protocol, so dcbx must be enabled. */ - enable = !!(type == DCBX_PROTOCOL_ETH); + enable = !(type == DCBX_PROTOCOL_ETH); qed_dcbx_update_app_info(p_data, p_hwfn, enable, true, priority, tc, type); -- cgit v0.10.2 From 297f7d2cce6a156c174334ee452f2f7a7ba405ca Mon Sep 17 00:00:00 2001 From: Baozeng Ding Date: Tue, 24 May 2016 22:33:24 +0800 Subject: tipc: fix potential null pointer dereferences in some compat functions Before calling the nla_parse_nested function, make sure the pointer to the attribute is not null. This patch fixes several potential null pointer dereference vulnerabilities in the tipc netlink functions. Signed-off-by: Baozeng Ding Signed-off-by: David S. Miller diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4dfc5c1..f795b1d 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -346,9 +346,15 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; - nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], - NULL); + err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL); + if (err) + return err; return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, nla_data(bearer[TIPC_NLA_BEARER_NAME]), @@ -460,14 +466,31 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + int err; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; - nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], - NULL); + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], + NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_PROP]) + return -EINVAL; - nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], - NULL); + err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_STATS]) + return -EINVAL; + + err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL); + if (err) + return err; name = (char *)TLV_DATA(msg->req); if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) @@ -569,8 +592,15 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, { struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_link_info link_info; + int err; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], + NULL); + if (err) + return err; link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); @@ -758,12 +788,23 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, u32 node, depth, type, lowbound, upbound; static const char * const scope_str[] = {"", " zone", " cluster", " node"}; + int err; - nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL); + if (!attrs[TIPC_NLA_NAME_TABLE]) + return -EINVAL; - nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], - NULL); + err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); + if (err) + return err; + + if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL); + if (err) + return err; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); @@ -815,8 +856,15 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, { u32 type, lower, upper; struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + int err; - nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL); + if (!attrs[TIPC_NLA_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], + NULL); + if (err) + return err; type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]); lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]); @@ -876,7 +924,13 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 sock_ref; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; - nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], NULL); + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], + NULL); + if (err) + return err; sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); tipc_tlv_sprintf(msg->rep, "%u:", sock_ref); @@ -917,9 +971,15 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_MEDIA]) + return -EINVAL; - nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], - NULL); + err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], + NULL); + if (err) + return err; return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME, nla_data(media[TIPC_NLA_MEDIA_NAME]), @@ -931,8 +991,15 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, { struct tipc_node_info node_info; struct nlattr *node[TIPC_NLA_NODE_MAX + 1]; + int err; - nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], NULL); + if (!attrs[TIPC_NLA_NODE]) + return -EINVAL; + + err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], + NULL); + if (err) + return err; node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR])); node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP])); @@ -971,8 +1038,16 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, { __be32 id; struct nlattr *net[TIPC_NLA_NET_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], + NULL); + if (err) + return err; - nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], NULL); id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID])); return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id)); -- cgit v0.10.2 From 91c45e38b9478ff507e05f10151d64cd0d1aad7b Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 24 May 2016 18:03:25 +0200 Subject: net: mvneta: Fix lacking spinlock initialization The spinlock used by the hwbm functions must be initialized by the network driver. This commit fixes this lack and the following erros when lockdep is enabled: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb4/0xe0) [] (dump_stack) from [] (__lock_acquire+0x1f58/0x2060) [] (__lock_acquire) from [] (lock_acquire+0xa4/0xd0) [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x54/0x68) [] (_raw_spin_lock_irqsave) from [] (hwbm_pool_add+0x1c/0xdc) [] (hwbm_pool_add) from [] (mvneta_bm_pool_use+0x338/0x490) [] (mvneta_bm_pool_use) from [] (mvneta_probe+0x654/0x1284) [] (mvneta_probe) from [] (platform_drv_probe+0x4c/0xb0) [] (platform_drv_probe) from [] (driver_probe_device+0x214/0x2c0) [] (driver_probe_device) from [] (__driver_attach+0xc0/0xc4) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) [] (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [] (driver_register) from [] (do_one_initcall+0x90/0x1dc) [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) [] (kernel_init) from [] (ret_from_fork+0x14/0x24) Fixes: baa11ebc0c76 ("net: mvneta: Use the new hwbm framework") Reported-by: Russell King Cc: Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c index 01fccec..466939f 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.c +++ b/drivers/net/ethernet/marvell/mvneta_bm.c @@ -189,6 +189,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); hwbm_pool->construct = mvneta_bm_construct; hwbm_pool->priv = new_pool; + spin_lock_init(&hwbm_pool->lock); /* Create new pool */ err = mvneta_bm_pool_create(priv, new_pool); -- cgit v0.10.2 From b388fc7405e901c7d6f7817d05193c054e761815 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 24 May 2016 18:03:26 +0200 Subject: net: hwbm: Fix unbalanced spinlock in error case When hwbm_pool_add exited in error the spinlock was not released. This patch fixes this issue. Fixes: 8cb2d8bf57e6 ("net: add a hardware buffer management helper API") Reported-by: Jean-Jacques Hiblot Cc: Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller diff --git a/net/core/hwbm.c b/net/core/hwbm.c index 941c284..2cab489 100644 --- a/net/core/hwbm.c +++ b/net/core/hwbm.c @@ -55,18 +55,21 @@ int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp) spin_lock_irqsave(&bm_pool->lock, flags); if (bm_pool->buf_num == bm_pool->size) { pr_warn("pool already filled\n"); + spin_unlock_irqrestore(&bm_pool->lock, flags); return bm_pool->buf_num; } if (buf_num + bm_pool->buf_num > bm_pool->size) { pr_warn("cannot allocate %d buffers for pool\n", buf_num); + spin_unlock_irqrestore(&bm_pool->lock, flags); return 0; } if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) { pr_warn("Adding %d buffers to the %d current buffers will overflow\n", buf_num, bm_pool->buf_num); + spin_unlock_irqrestore(&bm_pool->lock, flags); return 0; } -- cgit v0.10.2 From c0795bf64cba4d1b796fdc5b74b33772841ed1bb Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 24 May 2016 18:53:36 +0100 Subject: sfc: on MC reset, clear PIO buffer linkage in TXQs Otherwise, if we fail to allocate new PIO buffers, our TXQs will try to use the old ones, which aren't there any more. Fixes: 183233bec810 "sfc: Allocate and link PIO buffers; map them with write-combining" Signed-off-by: Edward Cree Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 1681084..1f30912 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -619,6 +619,17 @@ fail: return rc; } +static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) +{ + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + + /* All our existing PIO buffers went away */ + efx_for_each_channel(channel, efx) + efx_for_each_channel_tx_queue(tx_queue, channel) + tx_queue->piobuf = NULL; +} + #else /* !EFX_USE_PIO */ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n) @@ -635,6 +646,10 @@ static void efx_ef10_free_piobufs(struct efx_nic *efx) { } +static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) +{ +} + #endif /* EFX_USE_PIO */ static void efx_ef10_remove(struct efx_nic *efx) @@ -1018,6 +1033,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx) nic_data->must_realloc_vis = true; nic_data->must_restore_filters = true; nic_data->must_restore_piobufs = true; + efx_ef10_forget_old_piobufs(efx); nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; /* Driver-created vswitches and vports must be re-created */ -- cgit v0.10.2 From 75c0bbd0e23a295faf80aa68bf5ab2b5fac709b2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 24 May 2016 21:26:39 -0700 Subject: Documentation: networking: dsa: Remove poll_link description This function has been removed in 4baee937b8d5 ("net: dsa: remove DSA link polling") in favor of using the PHYLIB polling mechanism. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 631b0f7..8303eb8 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -416,11 +416,6 @@ PHY devices and link management to the switch port MDIO registers. If unavailable return a negative error code. -- poll_link: Function invoked by DSA to query the link state of the switch - builtin Ethernet PHYs, per port. This function is responsible for calling - netif_carrier_{on,off} when appropriate, and can be used to poll all ports in a - single call. Executes from workqueue context. - - adjust_link: Function invoked by the PHY library when a slave network device is attached to a PHY device. This function is responsible for appropriately configuring the switch port link parameters: speed, duplex, pause based on -- cgit v0.10.2 From 7013d8e1d0a07b74dc6f81b470654290b769e9bb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 24 May 2016 21:26:40 -0700 Subject: Documentation: networking: dsa: Remove priv_size description We no longer have a priv_size structure member since 5feebd0a8a79 ("net: dsa: Remove allocation of driver private memory") Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 8303eb8..411b57f 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -369,8 +369,6 @@ does not allocate any driver private context space. Switch configuration -------------------- -- priv_size: additional size needed by the switch driver for its private context - - tag_protocol: this is to indicate what kind of tagging protocol is supported, should be a valid value from the dsa_tag_protocol enum -- cgit v0.10.2 From f05e2db1996d7a640c76481ba5ea3d7a295a8f48 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 24 May 2016 21:26:41 -0700 Subject: Documentation: networking: dsa: Describe port_vlan_filtering Described what the port_vlan_filtering function is supposed to accomplish. Fixes: fb2dabad69f0 ("net: dsa: support VLAN filtering switchdev attr") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 411b57f..9d05ed7 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -535,6 +535,16 @@ Bridge layer Bridge VLAN filtering --------------------- +- port_vlan_filtering: bridge layer function invoked when the bridge gets + configured for turning on or off VLAN filtering. If nothing specific needs to + be done at the hardware level, this callback does not need to be implemented. + When VLAN filtering is turned on, the hardware must be programmed with + rejecting 802.1Q frames which have VLAN IDs outside of the programmed allowed + VLAN ID map/rules. If there is no PVID programmed into the switch port, + untagged frames must be rejected as well. When turned off the switch must + accept any 802.1Q frames irrespective of their VLAN ID, and untagged frames are + allowed. + - port_vlan_prepare: bridge layer function invoked when the bridge prepares the configuration of a VLAN on the given port. If the operation is not supported by the hardware, this function should return -EOPNOTSUPP to inform the bridge -- cgit v0.10.2 From f6988cb63a4e698d8a62a1d085d263d1fcc351ea Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 25 May 2016 21:21:52 +0200 Subject: team: don't call netdev_change_features under team->lock The team_device_event() notifier calls team_compute_features() to fix vlan_features under team->lock to protect team->port_list. The problem is that subsequent __team_compute_features() calls netdev_change_features() to propagate vlan_features to upper vlan devices while team->lock is still taken. This can lead to deadlock when NETIF_F_LRO is modified on lower devices or team device itself. Example: The team0 as active backup with eth0 and eth1 NICs. Both eth0 & eth1 are LRO capable and LRO is enabled. Thus LRO is also enabled on team0. The command 'ethtool -K team0 lro off' now hangs due to this deadlock: dev_ethtool() -> ethtool_set_features() -> __netdev_update_features(team) -> netdev_sync_lower_features() -> netdev_update_features(lower_1) -> __netdev_update_features(lower_1) -> netdev_features_change(lower_1) -> call_netdevice_notifiers(...) -> team_device_event(lower_1) -> team_compute_features(team) [TAKES team->lock] -> netdev_change_features(team) -> __netdev_update_features(team) -> netdev_sync_lower_features() -> netdev_update_features(lower_2) -> __netdev_update_features(lower_2) -> netdev_features_change(lower_2) -> call_netdevice_notifiers(...) -> team_device_event(lower_2) -> team_compute_features(team) [DEADLOCK] The bug is present in team from the beginning but it appeared after the commit fd867d5 (net/core: generic support for disabling netdev features down stack) that adds synchronization of features with lower devices. Fixes: fd867d5 (net/core: generic support for disabling netdev features down stack) Cc: Jiri Pirko Signed-off-by: Ivan Vecera Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a0f64cb..2ace126 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -990,7 +990,7 @@ static void team_port_disable(struct team *team, #define TEAM_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_RXCSUM | NETIF_F_ALL_TSO) -static void __team_compute_features(struct team *team) +static void ___team_compute_features(struct team *team) { struct team_port *port; u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL; @@ -1021,15 +1021,20 @@ static void __team_compute_features(struct team *team) team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) team->dev->priv_flags |= IFF_XMIT_DST_RELEASE; +} +static void __team_compute_features(struct team *team) +{ + ___team_compute_features(team); netdev_change_features(team->dev); } static void team_compute_features(struct team *team) { mutex_lock(&team->lock); - __team_compute_features(team); + ___team_compute_features(team); mutex_unlock(&team->lock); + netdev_change_features(team->dev); } static int team_port_enter(struct team *team, struct team_port *port) -- cgit v0.10.2 From 26c5f03b2ae8018418ceb25b2e6a48560e8c2f5b Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 25 May 2016 14:49:54 +0800 Subject: net: alx: use custom skb allocator This patch follows Eric Dumazet's commit 7b70176421 for Atheros atl1c driver to fix one exactly same bug in alx driver, that the network link will be lost in 1-5 minutes after the device is up. My laptop Lenovo Y580 with Atheros AR8161 ethernet device hit the same problem with kernel 4.4, and it will be cured by Jarod Wilson's commit c406700c for alx driver which get merged in 4.5. But there are still some alx devices can't function well even with Jarod's patch, while this patch could make them work fine. More details on https://bugzilla.kernel.org/show_bug.cgi?id=70761 The debug shows the issue is very likely to be related with the RX DMA address, specifically 0x...f80, if RX buffer get 0x...f80 several times, their will be RX overflow error and device will stop working. For kernel 4.5.0 with Jarod's patch which works fine with my AR8161/Lennov Y580, if I made some change to the __netdev_alloc_skb --> __alloc_page_frag() to make the allocated buffer can get an address with 0x...f80, then the same error happens. If I make it to 0x...f40 or 0x....fc0, everything will be still fine. So I tend to believe that the 0x..f80 address cause the silicon to behave abnormally. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70761 Cc: Eric Dumazet Cc: Johannes Berg Cc: Jarod Wilson Signed-off-by: Feng Tang Tested-by: Ole Lukoie Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index 8fc93c5..d02c424 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -96,6 +96,10 @@ struct alx_priv { unsigned int rx_ringsz; unsigned int rxbuf_size; + struct page *rx_page; + unsigned int rx_page_offset; + unsigned int rx_frag_size; + struct napi_struct napi; struct alx_tx_queue txq; struct alx_rx_queue rxq; diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 9fe8b5e..c98acdc 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -70,6 +70,35 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry) } } +static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp) +{ + struct sk_buff *skb; + struct page *page; + + if (alx->rx_frag_size > PAGE_SIZE) + return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp); + + page = alx->rx_page; + if (!page) { + alx->rx_page = page = alloc_page(gfp); + if (unlikely(!page)) + return NULL; + alx->rx_page_offset = 0; + } + + skb = build_skb(page_address(page) + alx->rx_page_offset, + alx->rx_frag_size); + if (likely(skb)) { + alx->rx_page_offset += alx->rx_frag_size; + if (alx->rx_page_offset >= PAGE_SIZE) + alx->rx_page = NULL; + else + get_page(page); + } + return skb; +} + + static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) { struct alx_rx_queue *rxq = &alx->rxq; @@ -86,7 +115,7 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) while (!cur_buf->skb && next != rxq->read_idx) { struct alx_rfd *rfd = &rxq->rfd[cur]; - skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp); + skb = alx_alloc_skb(alx, gfp); if (!skb) break; dma = dma_map_single(&alx->hw.pdev->dev, @@ -124,6 +153,7 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur); } + return count; } @@ -592,6 +622,11 @@ static void alx_free_rings(struct alx_priv *alx) kfree(alx->txq.bufs); kfree(alx->rxq.bufs); + if (alx->rx_page) { + put_page(alx->rx_page); + alx->rx_page = NULL; + } + dma_free_coherent(&alx->hw.pdev->dev, alx->descmem.size, alx->descmem.virt, @@ -646,6 +681,7 @@ static int alx_request_irq(struct alx_priv *alx) alx->dev->name, alx); if (!err) goto out; + /* fall back to legacy interrupt */ pci_disable_msi(alx->hw.pdev); } @@ -689,6 +725,7 @@ static int alx_init_sw(struct alx_priv *alx) struct pci_dev *pdev = alx->hw.pdev; struct alx_hw *hw = &alx->hw; int err; + unsigned int head_size; err = alx_identify_hw(alx); if (err) { @@ -704,7 +741,12 @@ static int alx_init_sw(struct alx_priv *alx) hw->smb_timer = 400; hw->mtu = alx->dev->mtu; + alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu); + head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + alx->rx_frag_size = roundup_pow_of_two(head_size); + alx->tx_ringsz = 256; alx->rx_ringsz = 512; hw->imt = 200; @@ -806,6 +848,7 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) { struct alx_priv *alx = netdev_priv(netdev); int max_frame = ALX_MAX_FRAME_LEN(mtu); + unsigned int head_size; if ((max_frame < ALX_MIN_FRAME_SIZE) || (max_frame > ALX_MAX_FRAME_SIZE)) @@ -817,6 +860,9 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) netdev->mtu = mtu; alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); + head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + alx->rx_frag_size = roundup_pow_of_two(head_size); netdev_update_features(netdev); if (netif_running(netdev)) alx_reinit(alx); -- cgit v0.10.2 From 643d60bf575daaba93c1ac0d0e1c4b1d4ded1f75 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 May 2016 00:40:23 +0200 Subject: net: stmmac: Fix incorrect memcpy source memory The memcpy() currently copies mdio_bus_data into new_bus->irq, which makes no sense, since the mdio_bus_data structure contains more than just irqs. The code was likely supposed to copy mdio_bus_data->irqs into the new_bus->irq instead, so fix this. Fixes: e7f4dc3536a4 ("mdio: Move allocation of interrupts into core") Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 3f83c36..ec29585 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -297,7 +297,7 @@ int stmmac_mdio_register(struct net_device *ndev) return -ENOMEM; if (mdio_bus_data->irqs) - memcpy(new_bus->irq, mdio_bus_data, sizeof(new_bus->irq)); + memcpy(new_bus->irq, mdio_bus_data->irqs, sizeof(new_bus->irq)); #ifdef CONFIG_OF if (priv->device->of_node) -- cgit v0.10.2 From 3424d9be8f09649e6290d066c5c3cccff1c0ce77 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 May 2016 00:40:05 +0200 Subject: net: arc: trivial: Replace comma with a semicolon Fix a typo in the driver, replace comma with a semicolon at the end of statement. While using comma is a legal C here and probably does not even generate compiler warning, it was unlikely the intention. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Caesar Wang Cc: Heiko Stuebner Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c index 16419f5..058460b 100644 --- a/drivers/net/ethernet/arc/emac_mdio.c +++ b/drivers/net/ethernet/arc/emac_mdio.c @@ -141,7 +141,7 @@ int arc_mdio_probe(struct arc_emac_priv *priv) priv->bus = bus; bus->priv = priv; bus->parent = priv->dev; - bus->name = "Synopsys MII Bus", + bus->name = "Synopsys MII Bus"; bus->read = &arc_mdio_read; bus->write = &arc_mdio_write; bus->reset = &arc_mdio_reset; -- cgit v0.10.2 From bed187b540167eb10320f6a2177604421650772e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 May 2016 03:09:23 +0800 Subject: sctp: fix double EPs display in sctp_diag We have this situation: that EP hash table, contains only the EPs that are listening, while the transports one, has the opposite. We have to traverse both to dump all. But when we traverse the transports one we will also get EPs that are in the EP hash if they are listening. In this case, the EP is dumped twice. We will fix it by checking if the endpoint that is in the endpoint hash table contains any ep->asoc in there, as it means we will also find it via transport hash, and thus we can/should skip it, depending on the filters used, like 'ss -l'. Still, we should NOT skip it if the user is listing only listening endpoints, because then we are not traversing the transport hash. so we have to check idiag_states there also. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 8e3e769..1ce724b 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -356,6 +356,9 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) if (cb->args[4] < cb->args[1]) goto next; + if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs)) + goto next; + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next; -- cgit v0.10.2 From 63a664b7e92b14aaa1e1c3e9ae362aa70cf4cefb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 May 2016 09:50:36 -0700 Subject: net/mlx4_en: fix tx_dropped bug 1) mlx4_en_xmit() can increment priv->stats.tx_dropped, but this variable is overwritten in mlx4_en_DUMP_ETH_STATS(). 2) This increment was not SMP safe, as a port might have many TX queues. Add a per TX ring tx_dropped to fix these issues. This is u32 as mlx4_en_DUMP_ETH_STATS() will add a 32bit field. So lets avoid bugs with SNMP agents having to cope with partial overwraps. (One of these agents being bond_fold_stats()) Signed-off-by: Eric Dumazet Reported-by: Willem de Bruijn Cc: Eugenia Emantayev Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 92e0624..cfd5020 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1892,6 +1892,7 @@ static void mlx4_en_clear_stats(struct net_device *dev) priv->tx_ring[i]->bytes = 0; priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; + priv->tx_ring[i]->tx_dropped = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 20b6c2e..3df8690 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -188,6 +188,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) } stats->tx_packets = 0; stats->tx_bytes = 0; + stats->tx_dropped = 0; priv->port_stats.tx_chksum_offload = 0; priv->port_stats.queue_stopped = 0; priv->port_stats.wake_queue = 0; @@ -199,6 +200,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->tx_packets += ring->packets; stats->tx_bytes += ring->bytes; + stats->tx_dropped += ring->tx_dropped; priv->port_stats.tx_chksum_offload += ring->tx_csum; priv->port_stats.queue_stopped += ring->queue_stopped; priv->port_stats.wake_queue += ring->wake_queue; @@ -251,7 +253,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->tx_fifo_errors = 0; stats->tx_heartbeat_errors = 0; stats->tx_window_errors = 0; - stats->tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP); + stats->tx_dropped += be32_to_cpu(mlx4_en_stats->TDROP); /* RX stats */ priv->pkstats.rx_multicast_packets = stats->multicast; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f6e6157..76aa4d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -726,12 +726,12 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool inline_ok; u32 ring_cons; - if (!priv->port_up) - goto tx_drop; - tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[tx_ind]; + if (!priv->port_up) + goto tx_drop; + /* fetch ring->cons far ahead before needing it to avoid stall */ ring_cons = ACCESS_ONCE(ring->cons); @@ -1030,7 +1030,7 @@ tx_drop_unmap: tx_drop: dev_kfree_skb_any(skb); - priv->stats.tx_dropped++; + ring->tx_dropped++; return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index cc84e09..9a91240 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -270,6 +270,7 @@ struct mlx4_en_tx_ring { unsigned long tx_csum; unsigned long tso_packets; unsigned long xmit_more; + unsigned int tx_dropped; struct mlx4_bf bf; unsigned long queue_stopped; -- cgit v0.10.2 From 45acbac609e73991dd9c3c9b7ffd03ddb8c939ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 May 2016 09:50:37 -0700 Subject: net/mlx4_en: clear some TX ring stats in mlx4_en_clear_stats() mlx4_en_clear_stats() clears about everything but few TX ring fields are missing : - queue_stopped, wake_queue, tso_packets, xmit_more Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Eugenia Emantayev Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index cfd5020..bd637c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1893,6 +1893,10 @@ static void mlx4_en_clear_stats(struct net_device *dev) priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; priv->tx_ring[i]->tx_dropped = 0; + priv->tx_ring[i]->queue_stopped = 0; + priv->tx_ring[i]->wake_queue = 0; + priv->tx_ring[i]->tso_packets = 0; + priv->tx_ring[i]->xmit_more = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; -- cgit v0.10.2 From 9ed17db17fb01a7be5b84558b768c091bdf8bb41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 May 2016 09:50:38 -0700 Subject: net/mlx4_en: get rid of ret_stats mlx4 uses a private struct net_device_stats in a vain attempt to avoid races. This is buggy because multiple cpus could call mlx4_en_get_stats() at the same time, so ret_stats can not guarantee stable results. To fix this, we need to switch to ndo_get_stats64() as this method provides per-thread storage. This allows to reduce mlx4_en_priv bloat. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Eugenia Emantayev Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bd637c4..a4fc6e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1296,15 +1296,16 @@ static void mlx4_en_tx_timeout(struct net_device *dev) } -static struct net_device_stats *mlx4_en_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 * +mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx4_en_priv *priv = netdev_priv(dev); spin_lock_bh(&priv->stats_lock); - memcpy(&priv->ret_stats, &priv->stats, sizeof(priv->stats)); + netdev_stats_to_stats64(stats, &priv->stats); spin_unlock_bh(&priv->stats_lock); - return &priv->ret_stats; + return stats; } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) @@ -2487,7 +2488,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_stop = mlx4_en_close, .ndo_start_xmit = mlx4_en_xmit, .ndo_select_queue = mlx4_en_select_queue, - .ndo_get_stats = mlx4_en_get_stats, + .ndo_get_stats64 = mlx4_en_get_stats64, .ndo_set_rx_mode = mlx4_en_set_rx_mode, .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, @@ -2519,7 +2520,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_stop = mlx4_en_close, .ndo_start_xmit = mlx4_en_xmit, .ndo_select_queue = mlx4_en_select_queue, - .ndo_get_stats = mlx4_en_get_stats, + .ndo_get_stats64 = mlx4_en_get_stats64, .ndo_set_rx_mode = mlx4_en_set_rx_mode, .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9a91240..bfda84df 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -484,7 +484,6 @@ struct mlx4_en_priv { struct net_device *dev; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct net_device_stats stats; - struct net_device_stats ret_stats; struct mlx4_en_port_state port_state; spinlock_t stats_lock; struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES]; -- cgit v0.10.2 From f73a6f439fca0e14f228726e5647d5afe141bc32 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 May 2016 09:50:39 -0700 Subject: net/mlx4_en: get rid of private net_device_stats We simply can use the standard net_device stats. We do not need to clear fields that are already 0. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Eugenia Emantayev Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c761194..fc95aff 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -362,7 +362,7 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < NUM_MAIN_STATS; i++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) - data[index++] = ((unsigned long *)&priv->stats)[i]; + data[index++] = ((unsigned long *)&dev->stats)[i]; for (i = 0; i < NUM_PORT_STATS; i++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a4fc6e9..19ceced 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1302,7 +1302,7 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) struct mlx4_en_priv *priv = netdev_priv(dev); spin_lock_bh(&priv->stats_lock); - netdev_stats_to_stats64(stats, &priv->stats); + netdev_stats_to_stats64(stats, &dev->stats); spin_unlock_bh(&priv->stats_lock); return stats; @@ -1877,7 +1877,6 @@ static void mlx4_en_clear_stats(struct net_device *dev) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); - memset(&priv->stats, 0, sizeof(priv->stats)); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 3df8690..5aa8b75 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -152,8 +152,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) struct mlx4_counter tmp_counter_stats; struct mlx4_en_stat_out_mbox *mlx4_en_stats; struct mlx4_en_stat_out_flow_control_mbox *flowstats; - struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); - struct net_device_stats *stats = &priv->stats; + struct net_device *dev = mdev->pndev[port]; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; struct mlx4_cmd_mailbox *mailbox; u64 in_mod = reset << 8 | port; int err; @@ -239,20 +240,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->multicast = en_stats_adder(&mlx4_en_stats->MCAST_prio_0, &mlx4_en_stats->MCAST_prio_1, NUM_PRIORITIES); - stats->collisions = 0; stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP) + sw_rx_dropped; stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); - stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); - stats->rx_frame_errors = 0; stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); - stats->rx_missed_errors = 0; - stats->tx_aborted_errors = 0; - stats->tx_carrier_errors = 0; - stats->tx_fifo_errors = 0; - stats->tx_heartbeat_errors = 0; - stats->tx_window_errors = 0; stats->tx_dropped += be32_to_cpu(mlx4_en_stats->TDROP); /* RX stats */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index bfda84df..467d47e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -483,7 +483,6 @@ struct mlx4_en_priv { struct mlx4_en_port_profile *prof; struct net_device *dev; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct net_device_stats stats; struct mlx4_en_port_state port_state; spinlock_t stats_lock; struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES]; -- cgit v0.10.2 From be7b6d64c0f2f31ea03746edaabb22f57234cb49 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 26 May 2016 11:01:17 +0300 Subject: qede: Fix VF minimum BW setting VF is currently ignoring the minimum provided by the API, mistakenly using the maximum for minimum as well. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 337e839..3bb1428 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1824,7 +1824,7 @@ static int qede_set_vf_rate(struct net_device *dev, int vfidx, { struct qede_dev *edev = netdev_priv(dev); - return edev->ops->iov->set_rate(edev->cdev, vfidx, max_tx_rate, + return edev->ops->iov->set_rate(edev->cdev, vfidx, min_tx_rate, max_tx_rate); } -- cgit v0.10.2 From ce2b885cc5014d1cbcbe519c45f00f6a59e5ab70 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 26 May 2016 11:01:18 +0300 Subject: qede: Reload on GRO changes Since driver is using a FW-based GRO implementation, this has some effects on its ability to cope with GRO enablement/disablement. As a result, driver must perform an inner-reload as a result of a state change in the offload configuration of said feature. [Failure to do so means network stack would continue to receive aggregated packets even though user requested the feature to be disabled]. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 3bb1428..5d00d14 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2091,6 +2091,29 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) edev->accept_any_vlan = false; } +int qede_set_features(struct net_device *dev, netdev_features_t features) +{ + struct qede_dev *edev = netdev_priv(dev); + netdev_features_t changes = features ^ dev->features; + bool need_reload = false; + + /* No action needed if hardware GRO is disabled during driver load */ + if (changes & NETIF_F_GRO) { + if (dev->features & NETIF_F_GRO) + need_reload = !edev->gro_disable; + else + need_reload = edev->gro_disable; + } + + if (need_reload && netif_running(edev->ndev)) { + dev->features = features; + qede_reload(edev, NULL, NULL); + return 1; + } + + return 0; +} + #ifdef CONFIG_QEDE_VXLAN static void qede_add_vxlan_port(struct net_device *dev, sa_family_t sa_family, __be16 port) @@ -2175,6 +2198,7 @@ static const struct net_device_ops qede_netdev_ops = { #endif .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid, + .ndo_set_features = qede_set_features, .ndo_get_stats64 = qede_get_stats64, #ifdef CONFIG_QED_SRIOV .ndo_set_vf_link_state = qede_set_vf_link_state, -- cgit v0.10.2 From 6ecb0a0c0d9539fac1497d1d218e60a393a1070c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 26 May 2016 11:01:19 +0300 Subject: qede: Don't expose self-test for VFs PFs and VFs differ in their registered ethtool operations, but they're using a common function for get_sset_count(). As a result, `ethtool -i' for a VF would indicate it supports selftest, although that's not the case. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 1bc7535..ad3cae3 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -230,7 +230,10 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) case ETH_SS_PRIV_FLAGS: return QEDE_PRI_FLAG_LEN; case ETH_SS_TEST: - return QEDE_ETHTOOL_TEST_MAX; + if (!IS_VF(edev)) + return QEDE_ETHTOOL_TEST_MAX; + else + return 0; default: DP_VERBOSE(edev, QED_MSG_DEBUG, "Unsupported stringset 0x%08x\n", stringset); -- cgit v0.10.2 From 795292916cf9d8c47c53b692bcb36b02953101cf Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 26 May 2016 11:01:20 +0300 Subject: qed: Fix allocation in interrupt context Commit 39651abd2814 ("qed: add support for dcbx") is re-configuring the QM hw-block as part of its sequence. This is done in attention handling context which is non-sleepable, yet memory is allocated in this flow using GFP_KERNEL. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 089016f..7aeed2f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -155,7 +155,7 @@ void qed_resc_free(struct qed_dev *cdev) } } -static int qed_init_qm_info(struct qed_hwfn *p_hwfn) +static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable) { u8 num_vports, vf_offset = 0, i, vport_id, num_ports, curr_queue = 0; struct qed_qm_info *qm_info = &p_hwfn->qm_info; @@ -182,23 +182,28 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn) /* PQs will be arranged as follows: First per-TC PQ then pure-LB quete. */ - qm_info->qm_pq_params = kzalloc(sizeof(*qm_info->qm_pq_params) * - num_pqs, GFP_KERNEL); + qm_info->qm_pq_params = kcalloc(num_pqs, + sizeof(struct init_qm_pq_params), + b_sleepable ? GFP_KERNEL : GFP_ATOMIC); if (!qm_info->qm_pq_params) goto alloc_err; - qm_info->qm_vport_params = kzalloc(sizeof(*qm_info->qm_vport_params) * - num_vports, GFP_KERNEL); + qm_info->qm_vport_params = kcalloc(num_vports, + sizeof(struct init_qm_vport_params), + b_sleepable ? GFP_KERNEL + : GFP_ATOMIC); if (!qm_info->qm_vport_params) goto alloc_err; - qm_info->qm_port_params = kzalloc(sizeof(*qm_info->qm_port_params) * - MAX_NUM_PORTS, GFP_KERNEL); + qm_info->qm_port_params = kcalloc(MAX_NUM_PORTS, + sizeof(struct init_qm_port_params), + b_sleepable ? GFP_KERNEL + : GFP_ATOMIC); if (!qm_info->qm_port_params) goto alloc_err; - qm_info->wfq_data = kcalloc(num_vports, sizeof(*qm_info->wfq_data), - GFP_KERNEL); + qm_info->wfq_data = kcalloc(num_vports, sizeof(struct qed_wfq_data), + b_sleepable ? GFP_KERNEL : GFP_ATOMIC); if (!qm_info->wfq_data) goto alloc_err; @@ -299,7 +304,7 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_qm_info_free(p_hwfn); /* initialize qed's qm data structure */ - rc = qed_init_qm_info(p_hwfn); + rc = qed_init_qm_info(p_hwfn, false); if (rc) return rc; @@ -388,7 +393,7 @@ int qed_resc_alloc(struct qed_dev *cdev) goto alloc_err; /* Prepare and process QM requirements */ - rc = qed_init_qm_info(p_hwfn); + rc = qed_init_qm_info(p_hwfn, true); if (rc) goto alloc_err; -- cgit v0.10.2 From cc3d5eb09111a471f942c76e9610ee962e1d4c31 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 26 May 2016 11:01:21 +0300 Subject: qed: Save min/max accross dcbx-change When DCBx re-negotiation is occurring, the PF's configurations for maximum and minimum bandwidth guarantees are currently lost. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 7aeed2f..920eadd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -161,6 +161,8 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable) struct qed_qm_info *qm_info = &p_hwfn->qm_info; struct init_qm_port_params *p_qm_port; u16 num_pqs, multi_cos_tcs = 1; + u8 pf_wfq = qm_info->pf_wfq; + u32 pf_rl = qm_info->pf_rl; u16 num_vfs = 0; #ifdef CONFIG_QED_SRIOV @@ -269,10 +271,10 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable) for (i = 0; i < qm_info->num_vports; i++) qm_info->qm_vport_params[i].vport_wfq = 1; - qm_info->pf_wfq = 0; - qm_info->pf_rl = 0; qm_info->vport_rl_en = 1; qm_info->vport_wfq_en = 1; + qm_info->pf_rl = pf_rl; + qm_info->pf_wfq = pf_wfq; return 0; -- cgit v0.10.2 From 1af9dcf7f90e6b75b7c42eaaf19cdd5da1354784 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 26 May 2016 11:01:22 +0300 Subject: qed: Add missing 100g init mode Some of the HW configurations are currently missing for 100g devices. This can cause various classification issues, as well as prevent device from fully reaching line-rate. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 920eadd..2a7c875 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -588,7 +588,14 @@ static void qed_calc_hw_mode(struct qed_hwfn *p_hwfn) hw_mode |= 1 << MODE_ASIC; + if (p_hwfn->cdev->num_hwfns > 1) + hw_mode |= 1 << MODE_100G; + p_hwfn->hw_info.hw_mode = hw_mode; + + DP_VERBOSE(p_hwfn, (NETIF_MSG_PROBE | NETIF_MSG_IFUP), + "Configuring function for hw_mode: 0x%08x\n", + p_hwfn->hw_info.hw_mode); } /* Init run time data for all PFs on an engine. */ -- cgit v0.10.2 From bb13ace7dca5d2385847e43511acf5777da35c0e Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 26 May 2016 11:01:23 +0300 Subject: qed: Prevent 100g from working in MSI Adapter can support 100g in both MSIx and INTa, but not in MSI. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 2a7c875..579c6d5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -835,6 +835,11 @@ int qed_hw_init(struct qed_dev *cdev, u32 load_code, param; int rc, mfw_rc, i; + if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) { + DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n"); + return -EINVAL; + } + if (IS_PF(cdev)) { rc = qed_init_fw_data(cdev, bin_fw_data); if (rc != 0) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 8b22f87..7530646 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -413,15 +413,17 @@ static int qed_set_int_mode(struct qed_dev *cdev, bool force_mode) /* Fallthrough */ case QED_INT_MODE_MSI: - rc = pci_enable_msi(cdev->pdev); - if (!rc) { - int_params->out.int_mode = QED_INT_MODE_MSI; - goto out; - } + if (cdev->num_hwfns == 1) { + rc = pci_enable_msi(cdev->pdev); + if (!rc) { + int_params->out.int_mode = QED_INT_MODE_MSI; + goto out; + } - DP_NOTICE(cdev, "Failed to enable MSI\n"); - if (force_mode) - goto out; + DP_NOTICE(cdev, "Failed to enable MSI\n"); + if (force_mode) + goto out; + } /* Fallthrough */ case QED_INT_MODE_INTA: -- cgit v0.10.2 From 3e7cfce228c6c67dd31e09175eaca55fee0c7082 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 26 May 2016 11:01:24 +0300 Subject: qed: Don't config min BW on 100g on link flap Currently 100g devices don't support minimum/maximum BW configurations, yet link flaps might cause the driver to attempt to do such a configuration. Prevent this just as we do for the maximum BW. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 579c6d5..2d89e8c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -2105,6 +2105,13 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, u32 min_pf_rate) { int i; + if (cdev->num_hwfns > 1) { + DP_VERBOSE(cdev, + NETIF_MSG_LINK, + "WFQ configuration is not supported for this device\n"); + return; + } + for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; -- cgit v0.10.2 From 9791d8e7627d1c4dbf8819646833f2f576b4f8f3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 25 May 2016 16:50:45 +0200 Subject: ipv6: hide ip6_encap_hlen/ip6_tnl_encap definitions A recent cleanup moved MAX_IPTUN_ENCAP_OPS along with some other definitions, but it is now invisible when CONFIG_INET is not defined, but still referenced from ip6_tunnel.h: In file included from net/xfrm/xfrm_input.c:17:0: include/net/ip6_tunnel.h:67:17: error: 'MAX_IPTUN_ENCAP_OPS' undeclared here (not in a function) ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; ^~~~~~~~~~~~~~~~~~~ This hides the ip6_encap_hlen and ip6_tnl_encap functions inside of CONFIG_INET so we don't run into the the problem. Alternatively we could move the macro out of the #ifdef again to restore the previous behavior Signed-off-by: Arnd Bergmann Fixes: 55c2bc143224 ("net: Cleanup encap items in ip_tunnels.h") Signed-off-by: David S. Miller diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index d325c81..43a5a0e 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -63,6 +63,8 @@ struct ip6_tnl_encap_ops { u8 *protocol, struct flowi6 *fl6); }; +#ifdef CONFIG_INET + extern const struct ip6_tnl_encap_ops __rcu * ip6tun_encaps[MAX_IPTUN_ENCAP_OPS]; @@ -138,7 +140,6 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); -#ifdef CONFIG_INET static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device *dev) { -- cgit v0.10.2 From fabb13db448efc23c47851550867fc46519de97e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 25 May 2016 16:50:46 +0200 Subject: fou: add Kconfig options for IPv6 support A previous patch added the fou6.ko module, but that failed to link in a couple of configurations: net/built-in.o: In function `ip6_tnl_encap_add_fou_ops': net/ipv6/fou6.c:88: undefined reference to `ip6_tnl_encap_add_ops' net/ipv6/fou6.c:94: undefined reference to `ip6_tnl_encap_add_ops' net/ipv6/fou6.c:97: undefined reference to `ip6_tnl_encap_del_ops' net/built-in.o: In function `ip6_tnl_encap_del_fou_ops': net/ipv6/fou6.c:106: undefined reference to `ip6_tnl_encap_del_ops' net/ipv6/fou6.c:107: undefined reference to `ip6_tnl_encap_del_ops' If CONFIG_IPV6=m, ip6_tnl_encap_add_ops/ip6_tnl_encap_del_ops are in a module, but fou6.c can still be built-in, and that obviously fails to link. Also, if CONFIG_IPV6=y, but CONFIG_IPV6_TUNNEL=m or CONFIG_IPV6_TUNNEL=n, the same problem happens for a different reason. This adds two new silent Kconfig symbols to work around both problems: - CONFIG_IPV6_FOU is now always set to 'm' if either CONFIG_NET_FOU=m or CONFIG_IPV6=m - CONFIG_IPV6_FOU_TUNNEL is set implicitly when IPV6_FOU is enabled and NET_FOU_IP_TUNNELS is also turned out, and it will ensure that CONFIG_IPV6_TUNNEL is also available. The options could be made user-visible as well, to give additional room for configuration, but it seems easier not to bother users with more choice here. Signed-off-by: Arnd Bergmann Fixes: aa3463d65e7b ("fou: Add encap ops for IPv6 tunnels") Signed-off-by: David S. Miller diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 3f84113..9946082 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -232,6 +232,15 @@ config IPV6_GRE Saying M here will produce a module called ip6_gre. If unsure, say N. +config IPV6_FOU + tristate + default NET_FOU && IPV6 + +config IPV6_FOU_TUNNEL + tristate + default NET_FOU_IP_TUNNELS && IPV6_FOU + select INET6_TUNNEL + config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" select FIB_RULES diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 7ec3129..d42ca3d 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-$(CONFIG_NET_FOU) += fou6.o +obj-$(CONFIG_NET_FOU_IPV6_TUNNELS) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index c972d0b..9ea249b 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -69,7 +69,7 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } EXPORT_SYMBOL(gue6_build_header); -#ifdef CONFIG_NET_FOU_IP_TUNNELS +#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL) static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { .encap_hlen = fou_encap_hlen, -- cgit v0.10.2 From 6756325a9a1e2d36b6210f4a42f77501c917ebb9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 May 2016 09:46:22 +0300 Subject: ptp: oops in ptp_ioctl() If we pass ERR_PTR(-EFAULT) to kfree() then it's going to oops. Fixes: 2ece068e1b1d ('ptp: use memdup_user().') Signed-off-by: Dan Carpenter Acked-by: Richard Cochran Signed-off-by: David S. Miller diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 0b1ac6b..d637c93 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -211,6 +211,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) sysoff = memdup_user((void __user *)arg, sizeof(*sysoff)); if (IS_ERR(sysoff)) { err = PTR_ERR(sysoff); + sysoff = NULL; break; } if (sysoff->n_samples > PTP_MAX_SAMPLES) { -- cgit v0.10.2 From 0d08df6c493898e679d9c517e77ea95c063d40ec Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 May 2016 14:43:52 +0300 Subject: net/lapb: tuse %*ph to dump buffers Use %*ph specifier to dump small buffers in hex format instead doing this byte-by-byte. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c index 5dba899..1824708 100644 --- a/net/lapb/lapb_in.c +++ b/net/lapb/lapb_in.c @@ -444,10 +444,9 @@ static void lapb_state3_machine(struct lapb_cb *lapb, struct sk_buff *skb, break; case LAPB_FRMR: - lapb_dbg(1, "(%p) S3 RX FRMR(%d) %02X %02X %02X %02X %02X\n", + lapb_dbg(1, "(%p) S3 RX FRMR(%d) %5ph\n", lapb->dev, frame->pf, - skb->data[0], skb->data[1], skb->data[2], - skb->data[3], skb->data[4]); + skb->data); lapb_establish_data_link(lapb); lapb_dbg(0, "(%p) S3 -> S1\n", lapb->dev); lapb_requeue_frames(lapb); diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c index ba4d015..482c94d 100644 --- a/net/lapb/lapb_out.c +++ b/net/lapb/lapb_out.c @@ -148,9 +148,7 @@ void lapb_transmit_buffer(struct lapb_cb *lapb, struct sk_buff *skb, int type) } } - lapb_dbg(2, "(%p) S%d TX %02X %02X %02X\n", - lapb->dev, lapb->state, - skb->data[0], skb->data[1], skb->data[2]); + lapb_dbg(2, "(%p) S%d TX %3ph\n", lapb->dev, lapb->state, skb->data); if (!lapb_data_transmit(lapb, skb)) kfree_skb(skb); diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c index 9d0a426..3c1914d 100644 --- a/net/lapb/lapb_subr.c +++ b/net/lapb/lapb_subr.c @@ -113,9 +113,7 @@ int lapb_decode(struct lapb_cb *lapb, struct sk_buff *skb, { frame->type = LAPB_ILLEGAL; - lapb_dbg(2, "(%p) S%d RX %02X %02X %02X\n", - lapb->dev, lapb->state, - skb->data[0], skb->data[1], skb->data[2]); + lapb_dbg(2, "(%p) S%d RX %3ph\n", lapb->dev, lapb->state, skb->data); /* We always need to look at 2 bytes, sometimes we need * to look at 3 and those cases are handled below. @@ -284,10 +282,9 @@ void lapb_transmit_frmr(struct lapb_cb *lapb) dptr++; *dptr++ = lapb->frmr_type; - lapb_dbg(1, "(%p) S%d TX FRMR %02X %02X %02X %02X %02X\n", + lapb_dbg(1, "(%p) S%d TX FRMR %5ph\n", lapb->dev, lapb->state, - skb->data[1], skb->data[2], skb->data[3], - skb->data[4], skb->data[5]); + &skb->data[1]); } else { dptr = skb_put(skb, 4); *dptr++ = LAPB_FRMR; @@ -299,9 +296,8 @@ void lapb_transmit_frmr(struct lapb_cb *lapb) dptr++; *dptr++ = lapb->frmr_type; - lapb_dbg(1, "(%p) S%d TX FRMR %02X %02X %02X\n", - lapb->dev, lapb->state, skb->data[1], - skb->data[2], skb->data[3]); + lapb_dbg(1, "(%p) S%d TX FRMR %3ph\n", + lapb->dev, lapb->state, &skb->data[1]); } lapb_transmit_buffer(lapb, skb, LAPB_RESPONSE); -- cgit v0.10.2 From 86651650d16a359e4142c6a8b0467c87e48c4c94 Mon Sep 17 00:00:00 2001 From: Elad Kanfi Date: Thu, 26 May 2016 15:00:06 +0300 Subject: net: nps_enet: Disable interrupts before napi reschedule Since NAPI works by shutting down event interrupts when theres work and turning them on when theres none, the net driver must make sure that interrupts are disabled when it reschedules polling. By calling napi_reschedule, the driver switches to polling mode, therefor there should be no interrupt interference. Any received packets will be handled in nps_enet_poll by polling the HW indication of received packet until all packets are handled. Signed-off-by: Elad Kanfi Acked-by: Noam Camus Tested-by: Alexey Brodkin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 085f912..06f0317 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -205,8 +205,10 @@ static int nps_enet_poll(struct napi_struct *napi, int budget) * re-adding ourselves to the poll list. */ - if (priv->tx_skb && !tx_ctrl_ct) + if (priv->tx_skb && !tx_ctrl_ct) { + nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0); napi_reschedule(napi); + } } return work_done; -- cgit v0.10.2 From 421eeea10d0a5e52256bce9544de477938104fdb Mon Sep 17 00:00:00 2001 From: Baozeng Ding Date: Thu, 26 May 2016 21:07:42 +0800 Subject: ieee802154: fix logic error in ieee802154_llsec_parse_dev_addr Fix a logic error to avoid potential null pointer dereference. Signed-off-by: Baozeng Ding Reviewed-by: Stefan Schmidt Signed-off-by: David S. Miller diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index ca207db..116187b 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1289,8 +1289,8 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, nl802154_dev_addr_policy)) return -EINVAL; - if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] && - !attrs[NL802154_DEV_ADDR_ATTR_MODE] && + if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || + !attrs[NL802154_DEV_ADDR_ATTR_MODE] || !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] || attrs[NL802154_DEV_ADDR_ATTR_EXTENDED])) return -EINVAL; -- cgit v0.10.2 From 68bb399e656f244d3d173a20a8280c167632fca8 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 26 May 2016 21:46:05 +0100 Subject: sfc: use flow dissector helpers for aRFS Signed-off-by: Edward Cree Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 8956995..9d6cc8b 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -842,33 +842,15 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, struct efx_nic *efx = netdev_priv(net_dev); struct efx_channel *channel; struct efx_filter_spec spec; - const __be16 *ports; - __be16 ether_type; - int nhoff; + struct flow_keys fk; int rc; - /* The core RPS/RFS code has already parsed and validated - * VLAN, IP and transport headers. We assume they are in the - * header area. - */ - - if (skb->protocol == htons(ETH_P_8021Q)) { - const struct vlan_hdr *vh = - (const struct vlan_hdr *)skb->data; - - /* We can't filter on the IP 5-tuple and the vlan - * together, so just strip the vlan header and filter - * on the IP part. - */ - EFX_BUG_ON_PARANOID(skb_headlen(skb) < sizeof(*vh)); - ether_type = vh->h_vlan_encapsulated_proto; - nhoff = sizeof(struct vlan_hdr); - } else { - ether_type = skb->protocol; - nhoff = 0; - } + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return -EPROTONOSUPPORT; - if (ether_type != htons(ETH_P_IP) && ether_type != htons(ETH_P_IPV6)) + if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) + return -EPROTONOSUPPORT; + if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) return -EPROTONOSUPPORT; efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT, @@ -878,34 +860,19 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO | EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT | EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT; - spec.ether_type = ether_type; - - if (ether_type == htons(ETH_P_IP)) { - const struct iphdr *ip = - (const struct iphdr *)(skb->data + nhoff); - - EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + sizeof(*ip)); - if (ip_is_fragment(ip)) - return -EPROTONOSUPPORT; - spec.ip_proto = ip->protocol; - spec.rem_host[0] = ip->saddr; - spec.loc_host[0] = ip->daddr; - EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + 4 * ip->ihl + 4); - ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); + spec.ether_type = fk.basic.n_proto; + spec.ip_proto = fk.basic.ip_proto; + + if (fk.basic.n_proto == htons(ETH_P_IP)) { + spec.rem_host[0] = fk.addrs.v4addrs.src; + spec.loc_host[0] = fk.addrs.v4addrs.dst; } else { - const struct ipv6hdr *ip6 = - (const struct ipv6hdr *)(skb->data + nhoff); - - EFX_BUG_ON_PARANOID(skb_headlen(skb) < - nhoff + sizeof(*ip6) + 4); - spec.ip_proto = ip6->nexthdr; - memcpy(spec.rem_host, &ip6->saddr, sizeof(ip6->saddr)); - memcpy(spec.loc_host, &ip6->daddr, sizeof(ip6->daddr)); - ports = (const __be16 *)(ip6 + 1); + memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr)); + memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr)); } - spec.rem_port = ports[0]; - spec.loc_port = ports[1]; + spec.rem_port = fk.ports.src; + spec.loc_port = fk.ports.dst; rc = efx->type->filter_rfs_insert(efx, &spec); if (rc < 0) @@ -916,18 +883,18 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, channel = efx_get_channel(efx, skb_get_rx_queue(skb)); ++channel->rfs_filters_added; - if (ether_type == htons(ETH_P_IP)) + if (spec.ether_type == htons(ETH_P_IP)) netif_info(efx, rx_status, efx->net_dev, "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", - spec.rem_host, ntohs(ports[0]), spec.loc_host, - ntohs(ports[1]), rxq_index, flow_id, rc); + spec.rem_host, ntohs(spec.rem_port), spec.loc_host, + ntohs(spec.loc_port), rxq_index, flow_id, rc); else netif_info(efx, rx_status, efx->net_dev, "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n", (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", - spec.rem_host, ntohs(ports[0]), spec.loc_host, - ntohs(ports[1]), rxq_index, flow_id, rc); + spec.rem_host, ntohs(spec.rem_port), spec.loc_host, + ntohs(spec.loc_port), rxq_index, flow_id, rc); return rc; } -- cgit v0.10.2 From 176b346b37f0b9c03e91eb6f1460e00f3c0c3edf Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Thu, 26 May 2016 12:28:05 -0400 Subject: Documentation: ip-sysctl.txt: clarify secure_redirects Clarify how secure_redirects works. Mention that RFC1122 always applies. Signed-off-by: Eric Garver Signed-off-by: David S. Miller diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 6c7f365b..9ae9293 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1036,15 +1036,17 @@ proxy_arp_pvlan - BOOLEAN shared_media - BOOLEAN Send(router) or accept(host) RFC1620 shared media redirects. - Overrides ip_secure_redirects. + Overrides secure_redirects. shared_media for the interface will be enabled if at least one of conf/{all,interface}/shared_media is set to TRUE, it will be disabled otherwise default TRUE secure_redirects - BOOLEAN - Accept ICMP redirect messages only for gateways, - listed in default gateway list. + Accept ICMP redirect messages only to gateways listed in the + interface's current gateway list. Even if disabled, RFC1122 redirect + rules still apply. + Overridden by shared_media. secure_redirects for the interface will be enabled if at least one of conf/{all,interface}/secure_redirects is set to TRUE, it will be disabled otherwise -- cgit v0.10.2 From 0e6b5259824e97a0f7e7b450421ff12865d3b0e2 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Thu, 26 May 2016 20:16:36 +0300 Subject: net: l2tp: Make l2tp_ip6 namespace aware l2tp_ip6 tunnel and session lookups were still using init_net, although the l2tp core infrastructure already supports lookups keyed by 'net'. As a result, l2tp_ip6_recv discarded packets for tunnels/sessions created in namespaces other than the init_net. Fix, by using dev_net(skb->dev) or sock_net(sk) where appropriate. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index c6f5df1b..6c54e03 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -128,6 +128,7 @@ static inline struct sock *l2tp_ip6_bind_lookup(struct net *net, */ static int l2tp_ip6_recv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); struct sock *sk; u32 session_id; u32 tunnel_id; @@ -154,7 +155,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(&init_net, NULL, session_id); + session = l2tp_session_find(net, NULL, session_id); if (session == NULL) goto discard; @@ -188,14 +189,14 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(&init_net, tunnel_id); + tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel != NULL) sk = tunnel->sock; else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(&init_net, &iph->daddr, + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, 0, tunnel_id); read_unlock_bh(&l2tp_ip6_lock); } @@ -263,6 +264,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *) uaddr; + struct net *net = sock_net(sk); __be32 v4addr = 0; int addr_type; int err; @@ -286,7 +288,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = -EADDRINUSE; read_lock_bh(&l2tp_ip6_lock); - if (__l2tp_ip6_bind_lookup(&init_net, &addr->l2tp_addr, + if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id)) goto out_in_use; read_unlock_bh(&l2tp_ip6_lock); @@ -456,7 +458,7 @@ static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb) return 0; drop: - IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); return -1; } -- cgit v0.10.2 From c08376ac97cb202ec65320f3d90d5c4c5e2adb0b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 26 May 2016 17:21:05 -0700 Subject: timer: Export destroy_hrtimer_on_stack() hrtimer_init_on_stack() needs a matching call to destroy_hrtimer_on_stack(), so both need to be exported. Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8c7392c..e99df0f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -425,6 +425,7 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer) { debug_object_free(timer, &hrtimer_debug_descr); } +EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else static inline void debug_hrtimer_init(struct hrtimer *timer) { } -- cgit v0.10.2 From bcf91bdb44d2e6d84ffc5b7ab0400d5ff1c27645 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 26 May 2016 17:21:06 -0700 Subject: net: pktgen: Call destroy_hrtimer_on_stack() If CONFIG_DEBUG_OBJECTS_TIMERS=y, hrtimer_init_on_stack() requires a matching call to destroy_hrtimer_on_stack() to clean up timer debug objects. Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8604ae2..8b02df0 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2245,10 +2245,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_set_expires(&t.timer, spin_until); remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); - if (remaining <= 0) { - pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); - return; - } + if (remaining <= 0) + goto out; start_time = ktime_get(); if (remaining < 100000) { @@ -2273,7 +2271,9 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) } pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); +out: pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); + destroy_hrtimer_on_stack(&t.timer); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) -- cgit v0.10.2 From ce577668a426c6a9e2470a09dcd07fbd6e45272a Mon Sep 17 00:00:00 2001 From: Chen Haiquan Date: Fri, 27 May 2016 10:49:11 +0800 Subject: vxlan: Accept user specified MTU value when create new vxlan link When create a new vxlan link, example: ip link add vtap mtu 1440 type vxlan vni 1 dev eth0 The argument "mtu" has no effect, because it is not set to conf->mtu. The default value is used in vxlan_dev_configure function. This problem was introduced by commit 0dfbdf4102b9 (vxlan: Factor out device configuration). Fixes: 0dfbdf4102b9 (vxlan: Factor out device configuration) Signed-off-by: Chen Haiquan Acked-by: Cong Wang Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 8ff30c3..f999db2 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3086,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL; + if (tb[IFLA_MTU]) + conf.mtu = nla_get_u32(tb[IFLA_MTU]); + err = vxlan_dev_configure(src_net, dev, &conf); switch (err) { case -ENODEV: -- cgit v0.10.2 From 86f04396ff6d36146ec335d429191a7c8e2209af Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 May 2016 13:33:50 +0300 Subject: atm: firestream: add more reserved strings This bug was there when the driver was first added in back in year 2000. It causes a Smatch warning: drivers/atm/firestream.c:849 process_incoming() error: buffer overflow 'res_strings' 60 <= 63 There are supposed to be 64 entries in this array and the missing strings are clearly in the 30 40 range. I added them as reserved 37 to reserved 40. It's possible that strings are really supposed to be added in the middle instead of at the end, but this approach is safe, in that it fixes the bug and doesn't break anything that wasn't already broken. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index a969a7e..85aaf22 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -181,13 +181,17 @@ static char *res_strings[] = { "reserved 27", "reserved 28", "reserved 29", - "reserved 30", + "reserved 30", /* FIXME: The strings between 30-40 might be wrong. */ "reassembly abort: no buffers", "receive buffer overflow", "change in GFC", "receive buffer full", "low priority discard - no receive descriptor", "low priority discard - missing end of packet", + "reserved 37", + "reserved 38", + "reserved 39", + "reseverd 40", "reserved 41", "reserved 42", "reserved 43", -- cgit v0.10.2 From f2633d2eaaab773ea8b29cea3785cf0f8a8872a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 May 2016 13:34:35 +0300 Subject: atm: iphase: off by one in rx_pkt() The iadev->rx_open[] array holds "iadev->num_vc" pointers (this code assumes that pointers are 32 bits). So the > here should be >= or else we could end up reading a garbage pointer from one element beyond the end of the array. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 7d00f29..809dd1e 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1128,7 +1128,7 @@ static int rx_pkt(struct atm_dev *dev) /* make the ptr point to the corresponding buffer desc entry */ buf_desc_ptr += desc; if (!desc || (desc > iadev->num_rx_desc) || - ((buf_desc_ptr->vc_index & 0xffff) > iadev->num_vc)) { + ((buf_desc_ptr->vc_index & 0xffff) >= iadev->num_vc)) { free_desc(dev, desc); IF_ERR(printk("IA: bad descriptor desc = %d \n", desc);) return -1; -- cgit v0.10.2 From 308453aa9156a3b8ee382c0949befb507a32b0c1 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Fri, 27 May 2016 17:45:07 +0100 Subject: vlan: Propagate MAC address to VLANs The MAC address of the physical interface is only copied to the VLAN when it is first created, resulting in an inconsistency after MAC address changes of only newly created VLANs having an up-to-date MAC. The VLANs should continue inheriting the MAC address of the physical interface until the VLAN MAC address is explicitly set to any value. This allows IPv6 EUI64 addresses for the VLAN to reflect any changes to the MAC of the physical interface and thus for DAD to behave as expected. Signed-off-by: Mike Manning Signed-off-by: David S. Miller diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a1e273a..82a116b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -290,6 +290,10 @@ static void vlan_sync_address(struct net_device *dev, if (ether_addr_equal(vlan->real_dev_addr, dev->dev_addr)) return; + /* vlan continues to inherit address of lower device */ + if (vlan_dev_inherit_address(vlandev, dev)) + goto out; + /* vlan address was different from the old address and is equal to * the new address */ if (!ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) && @@ -302,6 +306,7 @@ static void vlan_sync_address(struct net_device *dev, !ether_addr_equal(vlandev->dev_addr, dev->dev_addr)) dev_uc_add(dev, vlandev->dev_addr); +out: ether_addr_copy(vlan->real_dev_addr, dev->dev_addr); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 9d010a09..cc15579 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -109,6 +109,8 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); +bool vlan_dev_inherit_address(struct net_device *dev, + struct net_device *real_dev); static inline u32 vlan_get_ingress_priority(struct net_device *dev, u16 vlan_tci) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e7e6257..86ae75b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -245,6 +245,17 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23); } +bool vlan_dev_inherit_address(struct net_device *dev, + struct net_device *real_dev) +{ + if (dev->addr_assign_type != NET_ADDR_STOLEN) + return false; + + ether_addr_copy(dev->dev_addr, real_dev->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + return true; +} + static int vlan_dev_open(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); @@ -255,7 +266,8 @@ static int vlan_dev_open(struct net_device *dev) !(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) return -ENETDOWN; - if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) { + if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr) && + !vlan_dev_inherit_address(dev, real_dev)) { err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) goto out; @@ -560,8 +572,10 @@ static int vlan_dev_init(struct net_device *dev) /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; - if (is_zero_ether_addr(dev->dev_addr)) - eth_hw_addr_inherit(dev, real_dev); + if (is_zero_ether_addr(dev->dev_addr)) { + ether_addr_copy(dev->dev_addr, real_dev->dev_addr); + dev->addr_assign_type = NET_ADDR_STOLEN; + } if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); -- cgit v0.10.2 From 7fafe803749a0c238feb5d38cb4ddee53eca5aa0 Mon Sep 17 00:00:00 2001 From: Troy Kisky Date: Fri, 27 May 2016 13:30:40 -0700 Subject: net: fec: update dirty_tx even if no skb If dirty_tx isn't updated, then dma_unmap_single can be called twice. This fixes a [ 58.420980] ------------[ cut here ]------------ [ 58.425667] WARNING: CPU: 0 PID: 377 at /home/schurig/d/mkarm/linux-4.5/lib/dma-debug.c:1096 check_unmap+0x9d0/0xab8() [ 58.436405] fec 2188000.ethernet: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=66 bytes] encountered by Holger Signed-off-by: Troy Kisky Tested-by: Acked-by: Fugang Duan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index ca2cccc..3c0255e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1197,10 +1197,8 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) fec16_to_cpu(bdp->cbd_datlen), DMA_TO_DEVICE); bdp->cbd_bufaddr = cpu_to_fec32(0); - if (!skb) { - bdp = fec_enet_get_nextdesc(bdp, &txq->bd); - continue; - } + if (!skb) + goto skb_done; /* Check for errors. */ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | @@ -1239,7 +1237,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) /* Free the sk buffer associated with this last transmit */ dev_kfree_skb_any(skb); - +skb_done: /* Make sure the update to bdp and tx_skbuff are performed * before dirty_tx */ -- cgit v0.10.2 From 40eb90e9ccc3f96f937ea1db79d0f9cb61553ed5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 29 May 2016 17:42:13 +0800 Subject: sctp: sctp_diag should dump sctp socket type Now we cannot distinguish that one sk is a udp or sctp style when we use ss to dump sctp_info. it's necessary to dump it as well. For sctp_diag, ss support is not officially available, thus there are no official users of this yet, so we can add this field in the middle of sctp_info without breaking user API. v1->v2: - move 'sctpi_s_type' field to the end of struct sctp_info, so that it won't cause incompatibility with applications already built. - add __reserved3 in sctp_info to make sure sctp_info is 8-byte alignment. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/include/linux/sctp.h b/include/linux/sctp.h index dacb5e7..de1f643 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -765,6 +765,8 @@ struct sctp_info { __u8 sctpi_s_disable_fragments; __u8 sctpi_s_v4mapped; __u8 sctpi_s_frag_interleave; + __u32 sctpi_s_type; + __u32 __reserved3; }; struct sctp_infox { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 777d032..67154b8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4220,6 +4220,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_s_disable_fragments = sp->disable_fragments; info->sctpi_s_v4mapped = sp->v4mapped; info->sctpi_s_frag_interleave = sp->frag_interleave; + info->sctpi_s_type = sp->type; return 0; } -- cgit v0.10.2 From bc7cc5999fd392cc799630d7e375b2f4e29cc398 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 30 May 2016 14:04:25 +0900 Subject: openvswitch: update checksum in {push,pop}_mpls In the case of CHECKSUM_COMPLETE the skb checksum should be updated in {push,pop}_mpls() as they the type in the ethernet header. As suggested by Pravin Shelar. Cc: Pravin Shelar Fixes: 25cd9ba0abc0 ("openvswitch: Add basic MPLS support to kernel") Signed-off-by: Simon Horman Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 879185f..9a3eb7a 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -137,11 +137,23 @@ static bool is_flow_key_valid(const struct sw_flow_key *key) return !!key->eth.type; } +static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, + __be16 ethertype) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be16 diff[] = { ~(hdr->h_proto), ethertype }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } + + hdr->h_proto = ethertype; +} + static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_mpls *mpls) { __be32 *new_mpls_lse; - struct ethhdr *hdr; /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) @@ -160,9 +172,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); - hdr = eth_hdr(skb); - hdr->h_proto = mpls->mpls_ethertype; - + update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); if (!skb->inner_protocol) skb_set_inner_protocol(skb, skb->protocol); skb->protocol = mpls->mpls_ethertype; @@ -193,7 +203,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, * field correctly in the presence of VLAN tags. */ hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); - hdr->h_proto = ethertype; + update_ethertype(skb, hdr, ethertype); if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; -- cgit v0.10.2 From 95e4daa82086e2bd7b7163f33a4f455bf8ecdf48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 31 May 2016 22:42:11 +0200 Subject: fou: fix IPv6 Kconfig options The Kconfig options I added to work around broken compilation ended up screwing up things more, as I used the wrong symbol to control compilation of the file, resulting in IPv6 fou support to never be built into the kernel. Changing CONFIG_NET_FOU_IPV6_TUNNELS to CONFIG_IPV6_FOU fixes that problem, I had renamed the symbol in one location but not the other, and as the file is never being used by other kernel code, this did not lead to a build failure that I would have caught. After that fix, another issue with the same patch becomes obvious, as we 'select INET6_TUNNEL', which is related to IPV6_TUNNEL, but not the same, and this can still cause the original build failure when IPV6_TUNNEL is not built-in but IPV6_FOU is. The fix is equally trivial, we just need to select the right symbol. I have successfully build 350 randconfig kernels with this patch and verified that the driver is now being built. Signed-off-by: Arnd Bergmann Reported-by: Valentin Rothberg Fixes: fabb13db448e ("fou: add Kconfig options for IPv6 support") Signed-off-by: David S. Miller diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 9946082..2343e4f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -239,7 +239,7 @@ config IPV6_FOU config IPV6_FOU_TUNNEL tristate default NET_FOU_IP_TUNNELS && IPV6_FOU - select INET6_TUNNEL + select IPV6_TUNNEL config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index d42ca3d..6d8ea09 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-$(CONFIG_NET_FOU_IPV6_TUNNELS) += fou6.o +obj-$(CONFIG_IPV6_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) -- cgit v0.10.2 From bae5499cc55d2329ea0fbf09cb22298f4ca2f9bd Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 30 May 2016 15:00:54 +0200 Subject: bnx2x: avoid leaking memory on bnx2x_init_one() failures bnx2x_init_bp() allocates memory with bnx2x_alloc_mem_bp() so if we fail later in bnx2x_init_one() we need to free this memory with bnx2x_free_mem_bp() to avoid leakages. E.g. I'm observing memory leaks reported by kmemleak when a failure (unrelated) happens in bnx2x_vfpf_acquire(). Signed-off-by: Vitaly Kuznetsov Acked-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 0a5b770..c5fe9158 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13941,14 +13941,14 @@ static int bnx2x_init_one(struct pci_dev *pdev, bp->doorbells = bnx2x_vf_doorbells(bp); rc = bnx2x_vf_pci_alloc(bp); if (rc) - goto init_one_exit; + goto init_one_freemem; } else { doorbell_size = BNX2X_L2_MAX_CID(bp) * (1 << BNX2X_DB_SHIFT); if (doorbell_size > pci_resource_len(pdev, 2)) { dev_err(&bp->pdev->dev, "Cannot map doorbells, bar size too small, aborting\n"); rc = -ENOMEM; - goto init_one_exit; + goto init_one_freemem; } bp->doorbells = ioremap_nocache(pci_resource_start(pdev, 2), doorbell_size); @@ -13957,19 +13957,19 @@ static int bnx2x_init_one(struct pci_dev *pdev, dev_err(&bp->pdev->dev, "Cannot map doorbell space, aborting\n"); rc = -ENOMEM; - goto init_one_exit; + goto init_one_freemem; } if (IS_VF(bp)) { rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count); if (rc) - goto init_one_exit; + goto init_one_freemem; } /* Enable SRIOV if capability found in configuration space */ rc = bnx2x_iov_init_one(bp, int_mode, BNX2X_MAX_NUM_OF_VFS); if (rc) - goto init_one_exit; + goto init_one_freemem; /* calc qm_cid_count */ bp->qm_cid_count = bnx2x_set_qm_cid_count(bp); @@ -13988,7 +13988,7 @@ static int bnx2x_init_one(struct pci_dev *pdev, rc = bnx2x_set_int_mode(bp); if (rc) { dev_err(&pdev->dev, "Cannot set interrupts\n"); - goto init_one_exit; + goto init_one_freemem; } BNX2X_DEV_INFO("set interrupts successfully\n"); @@ -13996,7 +13996,7 @@ static int bnx2x_init_one(struct pci_dev *pdev, rc = register_netdev(dev); if (rc) { dev_err(&pdev->dev, "Cannot register net device\n"); - goto init_one_exit; + goto init_one_freemem; } BNX2X_DEV_INFO("device name after netdev register %s\n", dev->name); @@ -14029,6 +14029,9 @@ static int bnx2x_init_one(struct pci_dev *pdev, return 0; +init_one_freemem: + bnx2x_free_mem_bp(bp); + init_one_exit: bnx2x_disable_pcie_error_reporting(bp); -- cgit v0.10.2 From f00e35e259948b995aa1f3ee7fddb05f34a50157 Mon Sep 17 00:00:00 2001 From: wangyunjian Date: Tue, 31 May 2016 11:52:43 +0800 Subject: virtio_net: fix virtnet_open and virtnet_probe competing for try_fill_recv In function virtnet_open() and virtnet_probe(), func try_fill_recv() may be executed at the same time. VQ in virtqueue_add() has not been protected well and BUG_ON will be triggered when virito_net.ko being removed. Signed-off-by: Yunjian Wang Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 49d84e5..e0638e5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1925,24 +1925,11 @@ static int virtnet_probe(struct virtio_device *vdev) virtio_device_ready(vdev); - /* Last of all, set up some receive buffers. */ - for (i = 0; i < vi->curr_queue_pairs; i++) { - try_fill_recv(vi, &vi->rq[i], GFP_KERNEL); - - /* If we didn't even get one input buffer, we're useless. */ - if (vi->rq[i].vq->num_free == - virtqueue_get_vring_size(vi->rq[i].vq)) { - free_unused_bufs(vi); - err = -ENOMEM; - goto free_recv_bufs; - } - } - vi->nb.notifier_call = &virtnet_cpu_callback; err = register_hotcpu_notifier(&vi->nb); if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); - goto free_recv_bufs; + goto free_unregister_netdev; } /* Assume link up if device can't report link status, @@ -1960,10 +1947,9 @@ static int virtnet_probe(struct virtio_device *vdev) return 0; -free_recv_bufs: +free_unregister_netdev: vi->vdev->config->reset(vdev); - free_receive_bufs(vi); unregister_netdev(dev); free_vqs: cancel_delayed_work_sync(&vi->refill); -- cgit v0.10.2 From d69d169493463e6b1da9a1965d35126e479aa27f Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Thu, 26 May 2016 04:06:47 +0200 Subject: usbnet: smsc95xx: fix link detection for disabled autonegotiation To detect link status up/down for connections where autonegotiation is explicitly disabled, we don't get an irq but need to poll the status register for link up/down detection. This patch adds a workqueue to poll for link status. Signed-off-by: Christoph Fritz Signed-off-by: David S. Miller diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index d9d2806..dc989a8 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -61,6 +61,8 @@ #define SUSPEND_ALLMODES (SUSPEND_SUSPEND0 | SUSPEND_SUSPEND1 | \ SUSPEND_SUSPEND2 | SUSPEND_SUSPEND3) +#define CARRIER_CHECK_DELAY (2 * HZ) + struct smsc95xx_priv { u32 mac_cr; u32 hash_hi; @@ -69,6 +71,9 @@ struct smsc95xx_priv { spinlock_t mac_cr_lock; u8 features; u8 suspend_flags; + bool link_ok; + struct delayed_work carrier_check; + struct usbnet *dev; }; static bool turbo_mode = true; @@ -624,6 +629,44 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb) intdata); } +static void set_carrier(struct usbnet *dev, bool link) +{ + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + + if (pdata->link_ok == link) + return; + + pdata->link_ok = link; + + if (link) + usbnet_link_change(dev, 1, 0); + else + usbnet_link_change(dev, 0, 0); +} + +static void check_carrier(struct work_struct *work) +{ + struct smsc95xx_priv *pdata = container_of(work, struct smsc95xx_priv, + carrier_check.work); + struct usbnet *dev = pdata->dev; + int ret; + + if (pdata->suspend_flags != 0) + return; + + ret = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, MII_BMSR); + if (ret < 0) { + netdev_warn(dev->net, "Failed to read MII_BMSR\n"); + return; + } + if (ret & BMSR_LSTATUS) + set_carrier(dev, 1); + else + set_carrier(dev, 0); + + schedule_delayed_work(&pdata->carrier_check, CARRIER_CHECK_DELAY); +} + /* Enable or disable Tx & Rx checksum offload engines */ static int smsc95xx_set_features(struct net_device *netdev, netdev_features_t features) @@ -1165,13 +1208,20 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->flags |= IFF_MULTICAST; dev->net->hard_header_len += SMSC95XX_TX_OVERHEAD_CSUM; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; + + pdata->dev = dev; + INIT_DELAYED_WORK(&pdata->carrier_check, check_carrier); + schedule_delayed_work(&pdata->carrier_check, CARRIER_CHECK_DELAY); + return 0; } static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf) { struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + if (pdata) { + cancel_delayed_work(&pdata->carrier_check); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); pdata = NULL; @@ -1695,6 +1745,7 @@ static int smsc95xx_resume(struct usb_interface *intf) /* do this first to ensure it's cleared even in error case */ pdata->suspend_flags = 0; + schedule_delayed_work(&pdata->carrier_check, CARRIER_CHECK_DELAY); if (suspend_flags & SUSPEND_ALLMODES) { /* clear wake-up sources */ -- cgit v0.10.2 From faf8dcc12c2780bfec61eb95e5ad74af2ff4f82f Mon Sep 17 00:00:00 2001 From: Jon Cooper Date: Tue, 31 May 2016 19:12:32 +0100 Subject: sfc: Track RPS flow IDs per channel instead of per function Otherwise we get confused when two flows on different channels get the same flow ID. Signed-off-by: Edward Cree Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 0705ec86..097f363 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1726,14 +1726,33 @@ static int efx_probe_filters(struct efx_nic *efx) #ifdef CONFIG_RFS_ACCEL if (efx->type->offload_features & NETIF_F_NTUPLE) { - efx->rps_flow_id = kcalloc(efx->type->max_rx_ip_filters, - sizeof(*efx->rps_flow_id), - GFP_KERNEL); - if (!efx->rps_flow_id) { + struct efx_channel *channel; + int i, success = 1; + + efx_for_each_channel(channel, efx) { + channel->rps_flow_id = + kcalloc(efx->type->max_rx_ip_filters, + sizeof(*channel->rps_flow_id), + GFP_KERNEL); + if (!channel->rps_flow_id) + success = 0; + else + for (i = 0; + i < efx->type->max_rx_ip_filters; + ++i) + channel->rps_flow_id[i] = + RPS_FLOW_ID_INVALID; + } + + if (!success) { + efx_for_each_channel(channel, efx) + kfree(channel->rps_flow_id); efx->type->filter_table_remove(efx); rc = -ENOMEM; goto out_unlock; } + + efx->rps_expire_index = efx->rps_expire_channel = 0; } #endif out_unlock: @@ -1744,7 +1763,10 @@ out_unlock: static void efx_remove_filters(struct efx_nic *efx) { #ifdef CONFIG_RFS_ACCEL - kfree(efx->rps_flow_id); + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + kfree(channel->rps_flow_id); #endif down_write(&efx->filter_sem); efx->type->filter_table_remove(efx); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 38c4223..d13ddf9 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -403,6 +403,8 @@ enum efx_sync_events_state { * @event_test_cpu: Last CPU to handle interrupt or test event for this channel * @irq_count: Number of IRQs since last adaptive moderation decision * @irq_mod_score: IRQ moderation score + * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, + * indexed by filter ID * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors @@ -446,6 +448,8 @@ struct efx_channel { unsigned int irq_mod_score; #ifdef CONFIG_RFS_ACCEL unsigned int rfs_filters_added; +#define RPS_FLOW_ID_INVALID 0xFFFFFFFF + u32 *rps_flow_id; #endif unsigned n_rx_tobe_disc; @@ -889,9 +893,9 @@ struct vfdi_status; * @filter_sem: Filter table rw_semaphore, for freeing the table * @filter_lock: Filter table lock, for mere content changes * @filter_state: Architecture-dependent filter table state - * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, - * indexed by filter ID - * @rps_expire_index: Next index to check for expiry in @rps_flow_id + * @rps_expire_channel: Next channel to check for expiry + * @rps_expire_index: Next index to check for expiry in + * @rps_expire_channel's @rps_flow_id * @active_queues: Count of RX and TX queues that haven't been flushed and drained. * @rxq_flush_pending: Count of number of receive queues that need to be flushed. * Decremented when the efx_flush_rx_queue() is called. @@ -1035,7 +1039,7 @@ struct efx_nic { spinlock_t filter_lock; void *filter_state; #ifdef CONFIG_RFS_ACCEL - u32 *rps_flow_id; + unsigned int rps_expire_channel; unsigned int rps_expire_index; #endif diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 9d6cc8b..02b0b527 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -845,6 +845,9 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, struct flow_keys fk; int rc; + if (flow_id == RPS_FLOW_ID_INVALID) + return -EINVAL; + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) return -EPROTONOSUPPORT; @@ -879,8 +882,8 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, return rc; /* Remember this so we can check whether to expire the filter later */ - efx->rps_flow_id[rc] = flow_id; - channel = efx_get_channel(efx, skb_get_rx_queue(skb)); + channel = efx_get_channel(efx, rxq_index); + channel->rps_flow_id[rc] = flow_id; ++channel->rfs_filters_added; if (spec.ether_type == htons(ETH_P_IP)) @@ -902,24 +905,34 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) { bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); - unsigned int index, size; + unsigned int channel_idx, index, size; u32 flow_id; if (!spin_trylock_bh(&efx->filter_lock)) return false; expire_one = efx->type->filter_rfs_expire_one; + channel_idx = efx->rps_expire_channel; index = efx->rps_expire_index; size = efx->type->max_rx_ip_filters; while (quota--) { - flow_id = efx->rps_flow_id[index]; - if (expire_one(efx, flow_id, index)) + struct efx_channel *channel = efx_get_channel(efx, channel_idx); + flow_id = channel->rps_flow_id[index]; + + if (flow_id != RPS_FLOW_ID_INVALID && + expire_one(efx, flow_id, index)) { netif_info(efx, rx_status, efx->net_dev, - "expired filter %d [flow %u]\n", - index, flow_id); - if (++index == size) + "expired filter %d [queue %u flow %u]\n", + index, channel_idx, flow_id); + channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + } + if (++index == size) { + if (++channel_idx == efx->n_channels) + channel_idx = 0; index = 0; + } } + efx->rps_expire_channel = channel_idx; efx->rps_expire_index = index; spin_unlock_bh(&efx->filter_lock); -- cgit v0.10.2