From e77e6ff502ea3d193872b5b9033bfd9717b36447 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 15 Aug 2016 21:50:35 +0800 Subject: netfilter: conntrack: do not dump other netns's conntrack entries via proc We should skip the conntracks that belong to a different namespace, otherwise other unrelated netns's conntrack entries will be dumped via /proc/net/nf_conntrack. Fixes: 56d52d4892d0 ("netfilter: conntrack: use a single hashtable for all namespaces") Signed-off-by: Liping Zhang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 958a145..9f267c3 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -205,6 +205,7 @@ static int ct_seq_show(struct seq_file *s, void *v) struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l4proto *l4proto; + struct net *net = seq_file_net(s); int ret = 0; NF_CT_ASSERT(ct); @@ -215,6 +216,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (NF_CT_DIRECTION(hash)) goto release; + if (!net_eq(nf_ct_net(ct), net)) + goto release; + l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); NF_CT_ASSERT(l3proto); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); -- cgit v0.10.2 From 2497b84625466dc57b8c3a40cd41a659fe04cca6 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 22:46:04 +0800 Subject: netfilter: nfnetlink_log: add "nf-logger-3-1" module alias name Otherwise, if nfnetlink_log.ko is not loaded, we cannot add rules to log packets to the userspace when we specify it with arp family, such as: # nft add rule arp filter input log group 0 :1:1-37: Error: Could not process rule: No such file or directory add rule arp filter input log group 0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index cbcfdfb..6577db5 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1147,6 +1147,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG); MODULE_ALIAS_NF_LOGGER(AF_INET, 1); MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); +MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */ module_init(nfnetlink_log_init); module_exit(nfnetlink_log_fini); -- cgit v0.10.2 From aca300183ed4f723837f6619facff0890c46d313 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 23:13:02 +0800 Subject: netfilter: nfnetlink_acct: report overquota to the right netns We should report the over quota message to the right net namespace instead of the init netns. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index 80ca889..664da00 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -15,6 +15,6 @@ struct nf_acct; struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name); void nfnl_acct_put(struct nf_acct *acct); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); -extern int nfnl_acct_overquota(const struct sk_buff *skb, - struct nf_acct *nfacct); +int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb, + struct nf_acct *nfacct); #endif /* _NFNL_ACCT_H */ diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 1b4de4b..796605b 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -443,7 +443,7 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_update); -static void nfnl_overquota_report(struct nf_acct *nfacct) +static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct) { int ret; struct sk_buff *skb; @@ -458,11 +458,12 @@ static void nfnl_overquota_report(struct nf_acct *nfacct) kfree_skb(skb); return; } - netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, + netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, GFP_ATOMIC); } -int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) +int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb, + struct nf_acct *nfacct) { u64 now; u64 *quota; @@ -480,7 +481,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) if (now >= *quota && !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) { - nfnl_overquota_report(nfacct); + nfnl_overquota_report(net, nfacct); } return ret; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 3048a7e..cf32759 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) nfnl_acct_update(skb, info->nfacct); - overquota = nfnl_acct_overquota(skb, info->nfacct); + overquota = nfnl_acct_overquota(par->net, skb, info->nfacct); return overquota == NFACCT_UNDERQUOTA ? false : true; } -- cgit v0.10.2 From dcbe35909c8426e1ace74b4b99c4cb403cdaca89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Aug 2016 09:56:46 -0700 Subject: netfilter: tproxy: properly refcount tcp listeners inet_lookup_listener() and inet6_lookup_listener() no longer take a reference on the found listener. This minimal patch adds back the refcounting, but we might do this differently in net-next later. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Reported-and-tested-by: Denys Fedoryshchenko Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 7f4414d..663c4c3 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -127,6 +127,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, daddr, dport, in->ifindex); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in * xt_socket, since xt_TPROXY needs 0 bound @@ -195,6 +197,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, daddr, ntohs(dport), in->ifindex); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in * xt_socket, since xt_TPROXY needs 0 bound -- cgit v0.10.2 From 12be15dd5ac928b60323b1ed8f6facd7335bb2cc Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 23:13:01 +0800 Subject: netfilter: nfnetlink_acct: fix race between nfacct del and xt_nfacct destroy Suppose that we input the following commands at first: # nfacct add test # iptables -A INPUT -m nfacct --nfacct-name test And now "test" acct's refcnt is 2, but later when we try to delete the "test" nfacct and the related iptables rule at the same time, race maybe happen: CPU0 CPU1 nfnl_acct_try_del nfnl_acct_put atomic_dec_and_test //ref=1,testfail - - atomic_dec_and_test //ref=0,testok - kfree_rcu atomic_inc //ref=1 - So after the rcu grace period, nf_acct will be freed but it is still linked in the nfnl_acct_list, and we can access it later, then oops will happen. Convert atomic_dec_and_test and atomic_inc combinaiton to one atomic operation atomic_cmpxchg here to fix this problem. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 796605b..70eb2f6a 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -326,14 +326,14 @@ static int nfnl_acct_try_del(struct nf_acct *cur) { int ret = 0; - /* we want to avoid races with nfnl_acct_find_get. */ - if (atomic_dec_and_test(&cur->refcnt)) { + /* We want to avoid races with nfnl_acct_put. So only when the current + * refcnt is 1, we decrease it to 0. + */ + if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) { /* We are protected by nfnl mutex. */ list_del_rcu(&cur->head); kfree_rcu(cur, rcu_head); } else { - /* still in use, restore reference counter. */ - atomic_inc(&cur->refcnt); ret = -EBUSY; } return ret; -- cgit v0.10.2 From b75911b66ad508a3c3f006ce37d9f9ebee34da43 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 18 Aug 2016 20:39:05 +0800 Subject: netfilter: cttimeout: fix use after free error when delete netns In general, when we want to delete a netns, cttimeout_net_exit will be called before ipt_unregister_table, i.e. before ctnl_timeout_put. But after call kfree_rcu in cttimeout_net_exit, we will still decrease the timeout object's refcnt in ctnl_timeout_put, this is incorrect, and will cause a use after free error. It is easy to reproduce this problem: # while : ; do ip netns add xxx ip netns exec xxx nfct add timeout testx inet icmp timeout 200 ip netns exec xxx iptables -t raw -p icmp -I OUTPUT -j CT --timeout testx ip netns del xxx done ======================================================================= BUG kmalloc-96 (Tainted: G B E ): Poison overwritten ----------------------------------------------------------------------- INFO: 0xffff88002b5161e8-0xffff88002b5161e8. First byte 0x6a instead of 0x6b INFO: Allocated in cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout] age=104 cpu=0 pid=3330 ___slab_alloc+0x4da/0x540 __slab_alloc+0x20/0x40 __kmalloc+0x1c8/0x240 cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout] nfnetlink_rcv_msg+0x21a/0x230 [nfnetlink] [ ... ] So only when the refcnt decreased to 0, we call kfree_rcu to free the timeout object. And like nfnetlink_acct do, use atomic_cmpxchg to avoid race between ctnl_timeout_try_del and ctnl_timeout_put. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 4cdcd96..68216cd 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -330,16 +330,16 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; - /* we want to avoid races with nf_ct_timeout_find_get. */ - if (atomic_dec_and_test(&timeout->refcnt)) { + /* We want to avoid races with ctnl_timeout_put. So only when the + * current refcnt is 1, we decrease it to 0. + */ + if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { - /* still in use, restore reference counter. */ - atomic_inc(&timeout->refcnt); ret = -EBUSY; } return ret; @@ -543,7 +543,9 @@ err: static void ctnl_timeout_put(struct ctnl_timeout *timeout) { - atomic_dec(&timeout->refcnt); + if (atomic_dec_and_test(&timeout->refcnt)) + kfree_rcu(timeout, rcu_head); + module_put(THIS_MODULE); } #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ @@ -591,7 +593,9 @@ static void __net_exit cttimeout_net_exit(struct net *net) list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { list_del_rcu(&cur->head); nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); + + if (atomic_dec_and_test(&cur->refcnt)) + kfree_rcu(cur, rcu_head); } } -- cgit v0.10.2 From 039a392733600d35c80d406a98151b2a9a0a74b4 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Tue, 16 Aug 2016 18:40:18 +0300 Subject: qede: Fix Tx timeout due to xmit_more Driver uses netif_tx_queue_stopped() to make sure the xmit_more indication will be honored, but that only checks for DRV_XOFF. At the same time, it's possible that during transmission the DQL will close the transmission queue with STACK_XOFF indication. In re-configuration flows, when the threshold is relatively low, it's possible that the device has no pending tranmissions, and during tranmission the driver would miss doorbelling the HW. Since there are no pending transmission, there will never be a Tx completion [and thus the DQL would not remove the STACK_XOFF indication], eventually causing the Tx queue to timeout. While we're at it - also doorbell in case driver has to close the transmission queue on its own [although this one is less important - if the ring is full, we're bound to receive completion eventually, which means the doorbell would only be postponed and not indefinetly blocked]. Fixes: 312e06761c99 ("qede: Utilize xmit_more") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index e4bd02e..a6eb6af 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -722,11 +722,14 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, txq->tx_db.data.bd_prod = cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl)); - if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq)) + if (!skb->xmit_more || netif_xmit_stopped(netdev_txq)) qede_update_tx_producer(txq); if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1))) { + if (skb->xmit_more) + qede_update_tx_producer(txq); + netif_tx_stop_queue(netdev_txq); DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED, "Stop queue was called\n"); -- cgit v0.10.2 From 1423661fed2c40d6d71b5e2e3aa390f85157f9d5 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 Aug 2016 13:30:36 -0700 Subject: net: thunderx: Fix OOPs with ethtool --register-dump The ethtool_ops .get_regs function attempts to read the nonexistent register NIC_QSET_SQ_0_7_CNM_CHG, which produces a "bus error" type OOPs. Fix by not attempting to read, and removing the definition of, NIC_QSET_SQ_0_7_CNM_CHG. A zero is written into the register dump to keep the layout unchanged. Signed-off-by: David Daney Cc: # 4.4.x- Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index afb10e3..fab35a5 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -170,7 +170,6 @@ #define NIC_QSET_SQ_0_7_DOOR (0x010838) #define NIC_QSET_SQ_0_7_STATUS (0x010840) #define NIC_QSET_SQ_0_7_DEBUG (0x010848) -#define NIC_QSET_SQ_0_7_CNM_CHG (0x010860) #define NIC_QSET_SQ_0_7_STAT_0_1 (0x010900) #define NIC_QSET_RBDR_0_1_CFG (0x010C00) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index d2d8ef2..ad4fddb 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -382,7 +382,10 @@ static void nicvf_get_regs(struct net_device *dev, p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q); p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q); p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q); - p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q); + /* Padding, was NIC_QSET_SQ_0_7_CNM_CHG, which + * produces bus errors when read + */ + p[i++] = 0; p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q); reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3); p[i++] = nicvf_queue_reg_read(nic, reg_offset, q); -- cgit v0.10.2 From e0d8b2908696d30583ae5764e33332e71cbbccc9 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 17 Aug 2016 14:09:28 +0530 Subject: cxgb4: Fixes resource allocation for ULD's in kdump kernel At present the code to check in kdump kernel was not disabling allocation of resources when CONFIG_CHELSIO_T4_DCB is defined, move the code outside #defines so that it gets disabled irrespective of #define, when in kdump kernel. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c45de49..c762a8c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4335,6 +4335,11 @@ static void cfg_queues(struct adapter *adap) #endif int ciq_size; + /* Reduce memory usage in kdump environment, disable all offload. + */ + if (is_kdump_kernel()) + adap->params.offload = 0; + for_each_port(adap, i) n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); #ifdef CONFIG_CHELSIO_T4_DCB @@ -4365,11 +4370,6 @@ static void cfg_queues(struct adapter *adap) if (q10g > netif_get_num_default_rss_queues()) q10g = netif_get_num_default_rss_queues(); - /* Reduce memory usage in kdump environment, disable all offload. - */ - if (is_kdump_kernel()) - adap->params.offload = 0; - for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); -- cgit v0.10.2 From bb1fceca22492109be12640d49f5ea5a544c6bb4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Aug 2016 05:56:26 -0700 Subject: tcp: fix use after free in tcp_xmit_retransmit_queue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When tcp_sendmsg() allocates a fresh and empty skb, it puts it at the tail of the write queue using tcp_add_write_queue_tail() Then it attempts to copy user data into this fresh skb. If the copy fails, we undo the work and remove the fresh skb. Unfortunately, this undo lacks the change done to tp->highest_sack and we can leave a dangling pointer (to a freed skb) Later, tcp_xmit_retransmit_queue() can dereference this pointer and access freed memory. For regular kernels where memory is not unmapped, this might cause SACK bugs because tcp_highest_sack_seq() is buggy, returning garbage instead of tp->snd_nxt, but with various debug features like CONFIG_DEBUG_PAGEALLOC, this can crash the kernel. This bug was found by Marco Grassi thanks to syzkaller. Fixes: 6859d49475d4 ("[TCP]: Abstract tp->highest_sack accessing & point to next skb") Reported-by: Marco Grassi Signed-off-by: Eric Dumazet Cc: Ilpo Järvinen Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Reviewed-by: Cong Wang Signed-off-by: David S. Miller diff --git a/include/net/tcp.h b/include/net/tcp.h index c00e7d5..7717302 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1523,6 +1523,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli { if (sk->sk_send_head == skb_unlinked) sk->sk_send_head = NULL; + if (tcp_sk(sk)->highest_sack == skb_unlinked) + tcp_sk(sk)->highest_sack = NULL; } static inline void tcp_init_send_head(struct sock *sk) -- cgit v0.10.2 From b9f63ae7ba2de2ba19137c5757c0607ce40f3ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 17 Aug 2016 15:37:14 +0200 Subject: net: bgmac: fix reversed check for MII registration error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was failing on successful registration returning meaningless errors. Signed-off-by: Rafał Miłecki Fixes: 55954f3bfdac ("net: ethernet: bgmac: move BCMA MDIO Phy code into a separate file") Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 9a9745c4..625235d 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -159,7 +159,7 @@ static int bgmac_probe(struct bcma_device *core) if (!bgmac_is_bcm4707_family(core)) { mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr); - if (!IS_ERR(mii_bus)) { + if (IS_ERR(mii_bus)) { err = PTR_ERR(mii_bus); goto err; } -- cgit v0.10.2 From 60bcabd080f53561efa9288be45c128feda1a8bb Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 17 Aug 2016 15:51:55 +0200 Subject: kaweth: fix firmware download This fixes the oops discovered by the Umap2 project and Alan Stern. The intf member needs to be set before the firmware is downloaded. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 770212b..37bf715 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1029,6 +1029,7 @@ static int kaweth_probe( kaweth = netdev_priv(netdev); kaweth->dev = udev; kaweth->net = netdev; + kaweth->intf = intf; spin_lock_init(&kaweth->device_lock); init_waitqueue_head(&kaweth->term_wait); @@ -1139,8 +1140,6 @@ err_fw: dev_dbg(dev, "Initializing net device.\n"); - kaweth->intf = intf; - kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!kaweth->tx_urb) goto err_free_netdev; -- cgit v0.10.2 From 575ced7f8090c1a4e91e2daf8da9352a6a1fc7a7 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 17 Aug 2016 15:51:56 +0200 Subject: kaweth: fix oops upon failed memory allocation Just return an error upon failure. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 37bf715..528b9c9 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1009,6 +1009,7 @@ static int kaweth_probe( struct net_device *netdev; const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; int result = 0; + int rv = -EIO; dev_dbg(dev, "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n", @@ -1049,6 +1050,10 @@ static int kaweth_probe( /* Download the firmware */ dev_info(dev, "Downloading firmware...\n"); kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL); + if (!kaweth->firmware_buf) { + rv = -ENOMEM; + goto err_free_netdev; + } if ((result = kaweth_download_firmware(kaweth, "kaweth/new_code.bin", 100, @@ -1203,7 +1208,7 @@ err_only_tx: err_free_netdev: free_netdev(netdev); - return -EIO; + return rv; } /**************************************************************** -- cgit v0.10.2 From 98a384eca9c147f890b5ea31ae91da3769e47e07 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 18 Aug 2016 12:33:28 +0800 Subject: fib_trie: Fix the description of pos and bits 1) Fix one typo: s/tn/tp/ 2) Fix the description about the "u" bits. Signed-off-by: Xunlei Pang Acked-by: Alexander Duyck Signed-off-by: David S. Miller diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index febca0f..e2ffc2a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -249,7 +249,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv) * index into the parent's child array. That is, they will be used to find * 'n' among tp's children. * - * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits + * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits * for the node n. * * All the bits we have seen so far are significant to the node n. The rest @@ -258,7 +258,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv) * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into * n's child array, and will of course be different for each child. * - * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown + * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown * at this point. */ -- cgit v0.10.2 From 13f9bba7cd4f1665e4091143950a54e3f2984b07 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 18 Aug 2016 21:09:02 +0300 Subject: net/mlx5e: Set port MTU on netdev creation rather on open Port mtu shouldn't be written to hardware on every single interface open. Here we set it only when needed, on change_mtu and netdevice creation. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 870bea3..0fc3a2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1826,10 +1826,6 @@ int mlx5e_open_locked(struct net_device *netdev) netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - err = mlx5e_set_dev_port_mtu(netdev); - if (err) - goto err_clear_state_opened_flag; - err = mlx5e_open_channels(priv); if (err) { netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", @@ -2593,6 +2589,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) mlx5e_close_locked(netdev); netdev->mtu = new_mtu; + mlx5e_set_dev_port_mtu(netdev); if (was_opened) err = mlx5e_open_locked(netdev); @@ -3463,6 +3460,8 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, mlx5e_init_l2_addr(priv); + mlx5e_set_dev_port_mtu(netdev); + err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); -- cgit v0.10.2 From 506753b0b40997756d73f841c884a018f52401a0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 18 Aug 2016 21:09:03 +0300 Subject: net/mlx5e: Optimization for MTU change Avoid unnecessary interface down/up operations upon an MTU change when it does not affect the rings configuration. Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0fc3a2b..65258b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2569,6 +2569,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) u16 max_mtu; u16 min_mtu; int err = 0; + bool reset; mlx5_query_port_max_mtu(mdev, &max_mtu, 1); @@ -2584,14 +2585,18 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) mutex_lock(&priv->state_lock); + reset = !priv->params.lro_en && + (priv->params.rq_wq_type != + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) + if (was_opened && reset) mlx5e_close_locked(netdev); netdev->mtu = new_mtu; mlx5e_set_dev_port_mtu(netdev); - if (was_opened) + if (was_opened && reset) err = mlx5e_open_locked(netdev); mutex_unlock(&priv->state_lock); -- cgit v0.10.2 From 1061c90f524963a0a90e7d2f9a6bfa666458af51 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 18 Aug 2016 21:09:04 +0300 Subject: net/mlx5: Fix pci error recovery flow When PCI error is detected we should save the state of the pci prior to disabling it. Also when receiving pci slot reset call we need to verify that the device is responsive. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4f491d4..2385bae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1420,36 +1420,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv); + pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } -static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err = 0; - - dev_info(&pdev->dev, "%s was called\n", __func__); - - err = mlx5_pci_enable_device(dev); - if (err) { - dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" - , __func__, err); - return PCI_ERS_RESULT_DISCONNECT; - } - pci_set_master(pdev); - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; -} - -void mlx5_disable_device(struct mlx5_core_dev *dev) -{ - mlx5_pci_err_detected(dev->pdev, 0); -} - /* wait for the device to show vital signs by waiting * for the health counter to start counting. */ @@ -1477,21 +1453,44 @@ static int wait_vital(struct pci_dev *pdev) return -ETIMEDOUT; } -static void mlx5_pci_resume(struct pci_dev *pdev) +static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_priv *priv = &dev->priv; int err; dev_info(&pdev->dev, "%s was called\n", __func__); - pci_save_state(pdev); - err = wait_vital(pdev); + err = mlx5_pci_enable_device(dev); if (err) { + dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n" + , __func__, err); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + + if (wait_vital(pdev)) { dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); - return; + return PCI_ERS_RESULT_DISCONNECT; } + return PCI_ERS_RESULT_RECOVERED; +} + +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_err_detected(dev->pdev, 0); +} + +static void mlx5_pci_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_priv *priv = &dev->priv; + int err; + + dev_info(&pdev->dev, "%s was called\n", __func__); + err = mlx5_load_one(dev, priv); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" -- cgit v0.10.2 From 2c0f8ce1b584a4d7b8ff53140d21dfed99834940 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 18 Aug 2016 21:09:05 +0300 Subject: net/mlx5: Added missing check of msg length in verifying its signature Set and verify signature calculates the signature for each of the mailbox nodes, even for those that are unused (from cache). Added a missing length check to set and verify only those which are used. While here, also moved the setting of msg's nodes token to where we already go over them. This saves a pass because checksum is disabled, and the only useful thing remaining that set signature does is setting the token. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d6e2a1c..c2ec01a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) return cmd->cmd_buf + (idx << cmd->log_stride); } -static u8 xor8_buf(void *buf, int len) +static u8 xor8_buf(void *buf, size_t offset, int len) { u8 *ptr = buf; u8 sum = 0; int i; + int end = len + offset; - for (i = 0; i < len; i++) + for (i = offset; i < end; i++) sum ^= ptr[i]; return sum; @@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len) static int verify_block_sig(struct mlx5_cmd_prot_block *block) { - if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + int xor_len = sizeof(*block) - sizeof(block->data) - 1; + + if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) return -EINVAL; - if (xor8_buf(block, sizeof(*block)) != 0xff) + if (xor8_buf(block, 0, sizeof(*block)) != 0xff) return -EINVAL; return 0; } -static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, - int csum) +static void calc_block_sig(struct mlx5_cmd_prot_block *block) { - block->token = token; - if (csum) { - block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - - sizeof(block->data) - 2); - block->sig = ~xor8_buf(block, sizeof(*block) - 1); - } + int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + + block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); + block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); } -static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) +static void calc_chain_sig(struct mlx5_cmd_msg *msg) { struct mlx5_cmd_mailbox *next = msg->next; - - while (next) { - calc_block_sig(next->buf, token, csum); + int size = msg->len; + int blen = size - min_t(int, sizeof(msg->first.data), size); + int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) + / MLX5_CMD_DATA_BLOCK_SIZE; + int i = 0; + + for (i = 0; i < n && next; i++) { + calc_block_sig(next->buf); next = next->next; } } static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) { - ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); - calc_chain_sig(ent->in, ent->token, csum); - calc_chain_sig(ent->out, ent->token, csum); + ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (csum) { + calc_chain_sig(ent->in); + calc_chain_sig(ent->out); + } } static void poll_timeout(struct mlx5_cmd_work_ent *ent) @@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent) struct mlx5_cmd_mailbox *next = ent->out->next; int err; u8 sig; + int size = ent->out->len; + int blen = size - min_t(int, sizeof(ent->out->first.data), size); + int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) + / MLX5_CMD_DATA_BLOCK_SIZE; + int i = 0; - sig = xor8_buf(ent->lay, sizeof(*ent->lay)); + sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); if (sig != 0xff) return -EINVAL; - while (next) { + for (i = 0; i < n && next; i++) { err = verify_block_sig(next->buf); if (err) return err; @@ -656,7 +670,6 @@ static void cmd_work_handler(struct work_struct *work) spin_unlock_irqrestore(&cmd->alloc_lock, flags); } - ent->token = alloc_token(cmd); cmd->ent_arr[ent->idx] = ent; lay = get_inst(cmd, ent->idx); ent->lay = lay; @@ -766,7 +779,8 @@ static u8 *get_status_ptr(struct mlx5_outbox_hdr *out) static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, - void *context, int page_queue, u8 *status) + void *context, int page_queue, u8 *status, + u8 token) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -783,6 +797,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (IS_ERR(ent)) return PTR_ERR(ent); + ent->token = token; + if (!callback) init_completion(&ent->done); @@ -854,7 +870,8 @@ static const struct file_operations fops = { .write = dbg_write, }; -static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, + u8 token) { struct mlx5_cmd_prot_block *block; struct mlx5_cmd_mailbox *next; @@ -880,6 +897,7 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) memcpy(block->data, from, copy); from += copy; size -= copy; + block->token = token; next = next->next; } @@ -949,7 +967,8 @@ static void free_cmd_box(struct mlx5_core_dev *dev, } static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, - gfp_t flags, int size) + gfp_t flags, int size, + u8 token) { struct mlx5_cmd_mailbox *tmp, *head = NULL; struct mlx5_cmd_prot_block *block; @@ -978,6 +997,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, tmp->next = head; block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); block->block_num = cpu_to_be32(n - i - 1); + block->token = token; head = tmp; } msg->next = head; @@ -1352,7 +1372,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, } if (IS_ERR(msg)) - msg = mlx5_alloc_cmd_msg(dev, gfp, in_size); + msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); return msg; } @@ -1377,6 +1397,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int err; u8 status = 0; u32 drv_synd; + u8 token; if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -1395,20 +1416,22 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, return err; } - err = mlx5_copy_to_msg(inb, in, in_size); + token = alloc_token(&dev->cmd); + + err = mlx5_copy_to_msg(inb, in, in_size, token); if (err) { mlx5_core_warn(dev, "err %d\n", err); goto out_in; } - outb = mlx5_alloc_cmd_msg(dev, gfp, out_size); + outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); if (IS_ERR(outb)) { err = PTR_ERR(outb); goto out_in; } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status); + pages_queue, &status, token); if (err) goto out_out; @@ -1476,7 +1499,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&cmd->cache.med.head); for (i = 0; i < NUM_LONG_LISTS; i++) { - msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; @@ -1486,7 +1509,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev) } for (i = 0; i < NUM_MED_LISTS; i++) { - msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; -- cgit v0.10.2 From 6c3b4f90861c7ed59d0287b0ff7f2623f9d93d73 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 18 Aug 2016 21:09:06 +0300 Subject: net/mlx5: Update last-use statistics for flow rules Set lastuse statistic, when number of packets is changed compared to last query. This was wrongly dropped when bulk counter reading was added. Fixes: a351a1b03bf1 ('net/mlx5: Introduce bulk reading of flow counters') Signed-off-by: Amir Vadai Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index c2877e9..3a9195b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -126,12 +126,21 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, for (node = &first->node; node; node = rb_next(node)) { struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node); struct mlx5_fc_cache *c = &counter->cache; + u64 packets; + u64 bytes; if (counter->id > last_id) break; mlx5_cmd_fc_bulk_get(dev, b, - counter->id, &c->packets, &c->bytes); + counter->id, &packets, &bytes); + + if (c->packets == packets) + continue; + + c->packets = packets; + c->bytes = bytes; + c->lastuse = jiffies; } out: -- cgit v0.10.2 From 1dbd0d373ac338903d27fab5204b13122cc5accd Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 18 Aug 2016 21:09:07 +0300 Subject: net/mlx5e: Use correct flow dissector key on flower offloading The wrong key is used when extracting the address type field set by the flower offload code. We have to use the control key and not the basic key, fix that. Fixes: e3a2b7ed018e ('net/mlx5e: Support offload cls_flower with drop action') Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dc8b1cb..22cfc4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -170,7 +170,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, + FLOW_DISSECTOR_KEY_CONTROL, f->key); addr_type = key->addr_type; } -- cgit v0.10.2 From dbe413e3bb93e0634f6d8d00b01cda6f141e0acd Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 18 Aug 2016 21:09:08 +0300 Subject: net/mlx5e: Retrieve the switchdev id from the firmware only once Avoid firmware command execution each time the switchdev HW ID attr get call is made. We do that by reading the ID (PF NIC MAC) only once at load time and store it on the representor structure. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65258b2..03d944c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3387,6 +3387,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); rep.load = mlx5e_nic_rep_load; rep.unload = mlx5e_nic_rep_unload; rep.vport = 0; @@ -3505,16 +3506,20 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); int vport; + u8 mac[ETH_ALEN]; if (!MLX5_CAP_GEN(mdev, vport_group_manager)) return; + mlx5_query_nic_vport_mac_address(mdev, 0, mac); + for (vport = 1; vport < total_vfs; vport++) { struct mlx5_eswitch_rep rep; rep.load = mlx5e_vport_rep_load; rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; + ether_addr_copy(rep.hw_id, mac); mlx5_eswitch_register_vport_rep(esw, &rep); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1c7d8b8..134de4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -135,17 +135,16 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch_rep *rep = priv->ppriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u8 mac[ETH_ALEN]; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac); attr->u.ppid.id_len = ETH_ALEN; - memcpy(&attr->u.ppid.id, &mac, ETH_ALEN); + ether_addr_copy(attr->u.ppid.id, rep->hw_id); break; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index c0b0560..a961409 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -174,6 +174,7 @@ struct mlx5_eswitch_rep { void *priv_data; struct list_head vport_sqs_list; bool valid; + u8 hw_id[ETH_ALEN]; }; struct mlx5_esw_offload { -- cgit v0.10.2 From ef78618b9d29d40c95ca7092493545b8487e226c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 18 Aug 2016 21:09:09 +0300 Subject: net/mlx5: E-Switch, Return the correct devlink e-switch mode Since mlx5 has also the NONE e-switch mode, we must translate from mlx5 mode to devlink mode on the devlink eswitch mode get call, do that. While here, remove the mlx5_ prefix from the static function helpers that deal with the mode to comply with the rest of the code. Fixes: c930a3ad7453 ('net/mlx5e: Add devlink based SRIOV mode change') Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a357e8e..1a3ccbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -535,7 +535,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) esw_destroy_offloads_fdb_table(esw); } -static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) { switch (mode) { case DEVLINK_ESWITCH_MODE_LEGACY: @@ -551,6 +551,22 @@ static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) return 0; } +static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) +{ + switch (mlx5_mode) { + case SRIOV_LEGACY: + *mode = DEVLINK_ESWITCH_MODE_LEGACY; + break; + case SRIOV_OFFLOADS: + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + break; + default: + return -EINVAL; + } + + return 0; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) { struct mlx5_core_dev *dev; @@ -566,7 +582,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) if (cur_mlx5_mode == SRIOV_NONE) return -EOPNOTSUPP; - if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode)) + if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; if (cur_mlx5_mode == mlx5_mode) @@ -592,9 +608,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (dev->priv.eswitch->mode == SRIOV_NONE) return -EOPNOTSUPP; - *mode = dev->priv.eswitch->mode; - - return 0; + return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); } void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, -- cgit v0.10.2 From 1a8ee6f25b10da7e3de0899a184b221d793f2482 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 18 Aug 2016 21:09:10 +0300 Subject: net/mlx5: E-Switch, Set the send-to-vport rules in the correct table While adding actual offloading support to the new switchdev mode, we didn't change the setup of the send-to-vport rules to put them in the slow path table, fix that. Fixes: 1033665e63b6 ('net/mlx5: E-Switch, Use two priorities for SRIOV offloads mode') Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1a3ccbf..3dc83a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -113,7 +113,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, + flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); if (IS_ERR(flow_rule)) -- cgit v0.10.2 From f96750f8d6bd64bb6f5a7a1146121b96bc6115f0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 18 Aug 2016 21:09:11 +0300 Subject: net/mlx5: E-Switch, Avoid ACLs in the offloads mode When we are in the switchdev/offloads mode, HW matching is done as dictated by the offloaded rules and hence we don't need to enable the ACLs mechanism used by the legacy mode. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f6d6677..8b78f15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1451,7 +1451,8 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */ + /* Only VFs need ACLs for VST and spoofchk filtering */ + if (vport_num && esw->mode == SRIOV_LEGACY) { esw_vport_ingress_config(esw, vport); esw_vport_egress_config(esw, vport); } @@ -1502,7 +1503,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; - if (vport_num) { + if (vport_num && esw->mode == SRIOV_LEGACY) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1767,7 +1768,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, vport, err); mutex_lock(&esw->state_lock); - if (evport->enabled) + if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); mutex_unlock(&esw->state_lock); return err; @@ -1839,7 +1840,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); evport->vlan = vlan; evport->qos = qos; - if (evport->enabled) { + if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) goto out; @@ -1868,10 +1869,11 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); pschk = evport->spoofchk; evport->spoofchk = spoofchk; - if (evport->enabled) + if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); - if (err) - evport->spoofchk = pschk; + if (err) + evport->spoofchk = pschk; + } mutex_unlock(&esw->state_lock); return err; -- cgit v0.10.2 From 4c2f2454964477c66ef57745daab203b71783f66 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 18 Aug 2016 14:58:35 -0300 Subject: sctp: linearize early if it's not GSO Because otherwise when crc computation is still needed it's way more expensive than on a linear buffer to the point that it affects performance. It's so expensive that netperf test gives a perf output as below: Overhead Command Shared Object Symbol 18,62% netserver [kernel.vmlinux] [k] crc32_generic_shift 2,57% netserver [kernel.vmlinux] [k] __pskb_pull_tail 1,94% netserver [kernel.vmlinux] [k] fib_table_lookup 1,90% netserver [kernel.vmlinux] [k] copy_user_enhanced_fast_string 1,66% swapper [kernel.vmlinux] [k] intel_idle 1,63% netserver [kernel.vmlinux] [k] _raw_spin_lock 1,59% netserver [sctp] [k] sctp_packet_transmit 1,55% netserver [kernel.vmlinux] [k] memcpy_erms 1,42% netserver [sctp] [k] sctp_rcv # netperf -H 192.168.10.1 -l 10 -t SCTP_STREAM -cC -- -m 12000 SCTP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.1 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 212992 212992 12000 10.00 3016.42 2.88 3.78 1.874 2.462 After patch: Overhead Command Shared Object Symbol 2,75% netserver [kernel.vmlinux] [k] memcpy_erms 2,63% netserver [kernel.vmlinux] [k] copy_user_enhanced_fast_string 2,39% netserver [kernel.vmlinux] [k] fib_table_lookup 2,04% netserver [kernel.vmlinux] [k] __pskb_pull_tail 1,91% netserver [kernel.vmlinux] [k] _raw_spin_lock 1,91% netserver [sctp] [k] sctp_packet_transmit 1,72% netserver [mlx4_en] [k] mlx4_en_process_rx_cq 1,68% netserver [sctp] [k] sctp_rcv # netperf -H 192.168.10.1 -l 10 -t SCTP_STREAM -cC -- -m 12000 SCTP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.1 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 212992 212992 12000 10.00 3681.77 3.83 3.46 2.045 1.849 Fixes: 3acb50c18d8d ("sctp: delay as much as possible skb_linearize") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/input.c b/net/sctp/input.c index c182db7..69444d3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -119,7 +119,13 @@ int sctp_rcv(struct sk_buff *skb) skb_transport_offset(skb)) goto discard_it; - if (!pskb_may_pull(skb, sizeof(struct sctphdr))) + /* If the packet is fragmented and we need to do crc checking, + * it's better to just linearize it otherwise crc computing + * takes longer. + */ + if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && + skb_linearize(skb)) || + !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; /* Pull up the IP header. */ @@ -1177,9 +1183,6 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) return NULL; - if (skb_linearize(skb)) - return NULL; - ch = (sctp_chunkhdr_t *) skb->data; /* The code below will attempt to walk the chunk and extract diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index c30ddb0..6437aa9 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -170,19 +170,6 @@ next_chunk: chunk = list_entry(entry, struct sctp_chunk, list); - /* Linearize if it's not GSO */ - if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP && - skb_is_nonlinear(chunk->skb)) { - if (skb_linearize(chunk->skb)) { - __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); - sctp_chunk_free(chunk); - goto next_chunk; - } - - /* Update sctp_hdr as it probably changed */ - chunk->sctp_hdr = sctp_hdr(chunk->skb); - } - if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) { /* GSO-marked skbs but without frags, handle * them normally -- cgit v0.10.2 From 56cff471d0c62b721a298f806e7637501debb513 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 19 Aug 2016 13:36:23 +0800 Subject: l2tp: Fix the connect status check in pppol2tp_getname The sk->sk_state is bits flag, so need use bit operation check instead of value check. Signed-off-by: Gao Feng Tested-by: Guillaume Nault Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d9560aa..232cb92 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -856,7 +856,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, error = -ENOTCONN; if (sk == NULL) goto end; - if (sk->sk_state != PPPOX_CONNECTED) + if (!(sk->sk_state & PPPOX_CONNECTED)) goto end; error = -EBADF; -- cgit v0.10.2 From 8912862f067276b480c4fed9da74c9c5601130a6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 19 Aug 2016 14:43:48 +0200 Subject: mlxsw: spectrum_buffers: Fix pool value handling in mlxsw_sp_sb_tc_pool_bind_set Pool index has to be converted by get_pool helper to work correctly for egress pool. In mlxsw the egress pool index starts from 0. Fixes: 0f433fa0ecc ("mlxsw: spectrum_buffers: Implement shared buffer configuration") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 237418a..953b214 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -717,22 +717,18 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; enum mlxsw_reg_sbxx_dir dir = pool_type; - u8 pool = pool_index; + u8 pool = pool_get(pool_index); u32 max_buff; int err; + if (dir != dir_get(pool_index)) + return -EINVAL; + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, threshold, &max_buff); if (err) return err; - if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS) { - if (pool < MLXSW_SP_SB_POOL_COUNT) - return -EINVAL; - pool -= MLXSW_SP_SB_POOL_COUNT; - } else if (pool >= MLXSW_SP_SB_POOL_COUNT) { - return -EINVAL; - } return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir, 0, max_buff, pool); } -- cgit v0.10.2 From c10ac75aeed2d8486a73a316ac3a08f85d140894 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 19 Aug 2016 20:58:26 -0700 Subject: ixgbe: Do not clear RAR entry when clearing VMDq for SAN MAC The RAR entry for the SAN MAC address was being cleared when we were clearing the VMDq pool bits. In order to prevent this we need to add an extra check to protect the SAN MAC from being cleared. Fixes: 6e982aeae ("ixgbe: Clear stale pool mappings") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index b4217f3..c47b605 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -2958,8 +2958,10 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) } /* was that the last pool using this rar? */ - if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) + if (mpsar_lo == 0 && mpsar_hi == 0 && + rar != 0 && rar != hw->mac.san_mac_rar_index) hw->mac.ops.clear_rar(hw, rar); + return 0; } -- cgit v0.10.2 From ff2e7d5d51469e98196f7933c83b781e96517e7c Mon Sep 17 00:00:00 2001 From: Shrikrishna Khare Date: Fri, 19 Aug 2016 10:33:42 -0700 Subject: vmxnet3: fix tx data ring copy for variable size 'Commit 3c8b3efc061a ("vmxnet3: allow variable length transmit data ring buffer")' changed the size of the buffers in the tx data ring from a fixed size of 128 bytes to a variable size. However, while copying data to the data ring, vmxnet3_copy_hdr continues to carry the old code that assumes fixed buffer size of 128. This patch fixes it by adding correct offset based on the actual data ring buffer size. Signed-off-by: Guolin Yang Signed-off-by: Shrikrishna Khare Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index c68fe49..4244b9d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -914,7 +914,9 @@ vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, { struct Vmxnet3_TxDataDesc *tdd; - tdd = tq->data_ring.base + tq->tx_ring.next2fill; + tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base + + tq->tx_ring.next2fill * + tq->txdata_desc_size); memcpy(tdd->data, skb->data, ctx->copy_size); netdev_dbg(adapter->netdev, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 74fc030..7dc37a0 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.9.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.a.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040900 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040a00 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ -- cgit v0.10.2 From 5575cf133cf7f564da991595c6bc9344afa7d89a Mon Sep 17 00:00:00 2001 From: Daniel Romell Date: Fri, 19 Aug 2016 14:12:01 +0200 Subject: net: xilinx: emaclite: Fallback to random MAC address. If the address configured in the device tree is invalid, the driver will fallback to using a random address from the locally administered range. Signed-off-by: Daniel Romell Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 3cee84a..93dc10b 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1131,11 +1131,13 @@ static int xemaclite_of_probe(struct platform_device *ofdev) lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong"); mac_address = of_get_mac_address(ofdev->dev.of_node); - if (mac_address) + if (mac_address) { /* Set the MAC address. */ memcpy(ndev->dev_addr, mac_address, ETH_ALEN); - else - dev_warn(dev, "No MAC address found\n"); + } else { + dev_warn(dev, "No MAC address found, using random\n"); + eth_hw_addr_random(ndev); + } /* Clear the Tx CSR's in case this is a restart */ __raw_writel(0, lp->base_addr + XEL_TSR_OFFSET); -- cgit v0.10.2 From d524d84b588e300418a99794eb5066683ec7c488 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 21 Aug 2016 15:24:33 +0100 Subject: net: tehuti: fix typo: "eneble" -> "enable" trivial typo fix in pr_err message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 7452b5f..7108c68 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1987,7 +1987,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if ((readl(nic->regs + FPGA_VER) & 0xFFF) >= 378) { err = pci_enable_msi(pdev); if (err) - pr_err("Can't eneble msi. error is %d\n", err); + pr_err("Can't enable msi. error is %d\n", err); else nic->irq_type = IRQ_MSI; } else -- cgit v0.10.2 From b47b0cc73032d3c8225b5ea9a077941632f16d91 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:38 +0200 Subject: include/uapi/linux/if_pppox.h: include linux/if.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation error: error: ‘IFNAMSIZ’ undeclared here (not in a function) Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index e128769..473c3c4 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -21,6 +21,7 @@ #include #include +#include #include #include -- cgit v0.10.2 From 1fe8e0f074c77aa41aaa579345a9e675acbebfa9 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:39 +0200 Subject: include/uapi/linux/if_tunnel.h: include linux/if.h, linux/ip.h and linux/in6.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation errors like: error: field ‘iph’ has incomplete type error: field ‘prefix’ has incomplete type Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 1046f55..777b6cd 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -2,6 +2,9 @@ #define _UAPI_IF_TUNNEL_H_ #include +#include +#include +#include #include -- cgit v0.10.2 From 05ee5de7451796cf9a8aeb2f05a57790d4fd2336 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:42 +0200 Subject: include/uapi/linux/if_pppol2tp.h: include linux/in.h and linux/in6.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation errors like: error: field ‘addr’ has incomplete type struct sockaddr_in addr; /* IP address and port to send to */ ^ error: field ‘addr’ has incomplete type struct sockaddr_in6 addr; /* IP address and port to send to */ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h index 163e8ad..4bd1f55 100644 --- a/include/uapi/linux/if_pppol2tp.h +++ b/include/uapi/linux/if_pppol2tp.h @@ -16,7 +16,8 @@ #define _UAPI__LINUX_IF_PPPOL2TP_H #include - +#include +#include /* Structure used to connect() the socket to a particular tunnel UDP * socket over IPv4. -- cgit v0.10.2 From eafe92114308acf14e45c6c3d154a5dad5523d1a Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:43 +0200 Subject: include/uapi/linux/if_pppox.h: include linux/in.h and linux/in6.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation errors: error: field ‘addr’ has incomplete type struct sockaddr_in addr; /* IP address and port to send to */ error: field ‘addr’ has incomplete type struct sockaddr_in6 addr; /* IP address and port to send to */ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 473c3c4..d37bbb1 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -24,6 +24,8 @@ #include #include #include +#include +#include /* For user-space programs to pick up these definitions * which they wouldn't get otherwise without defining __KERNEL__ -- cgit v0.10.2 From e6571aa5cb65ff52a87843652d0d8120a48aae7c Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:55 +0200 Subject: include/uapi/linux/openvswitch.h: use __u32 from linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compiler error: error: unknown type name ‘uint32_t’ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d95a301..645499a 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -632,8 +632,8 @@ enum ovs_hash_alg { * @hash_basis: basis used for computing hash. */ struct ovs_action_hash { - uint32_t hash_alg; /* One of ovs_hash_alg. */ - uint32_t hash_basis; + __u32 hash_alg; /* One of ovs_hash_alg. */ + __u32 hash_basis; }; /** -- cgit v0.10.2 From cf00713a655d3019be7faa184402f16c43a0fed3 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:32:58 +0200 Subject: include/uapi/linux/atm_zatm.h: include linux/time.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compile error: error: field ‘real’ has incomplete type struct timeval real; /* real (wall-clock) time */ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h index 9c9c6ad..5cd4d4d 100644 --- a/include/uapi/linux/atm_zatm.h +++ b/include/uapi/linux/atm_zatm.h @@ -14,6 +14,7 @@ #include #include +#include #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) /* get pool statistics */ -- cgit v0.10.2 From a1d1f65ff5ac27276a585b41a619d30995bb92fe Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:33:19 +0200 Subject: include/uapi/linux/openvswitch.h: use __u32 from linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel uapi header are supposed to use them. Fixes userspace compile error: linux/openvswitch.h:583:2: error: unknown type name ‘uint32_t’ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 645499a..54c3b4f 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -583,7 +583,7 @@ enum ovs_userspace_attr { #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) struct ovs_action_trunc { - uint32_t max_len; /* Max packet size in bytes. */ + __u32 max_len; /* Max packet size in bytes. */ }; /** -- cgit v0.10.2 From 53dc65d4d33c422d086c9d9ad8c03ab400ffc0a1 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Mon, 22 Aug 2016 20:33:21 +0200 Subject: include/uapi/linux/ipx.h: fix conflicting defitions with glibc netipx/ipx.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes these compiler warnings via libc-compat.h when glibc netipx/ipx.h is included before linux/ipx.h: ./linux/ipx.h:9:8: error: redefinition of ‘struct sockaddr_ipx’ ./linux/ipx.h:26:8: error: redefinition of ‘struct ipx_route_definition’ ./linux/ipx.h:32:8: error: redefinition of ‘struct ipx_interface_definition’ ./linux/ipx.h:49:8: error: redefinition of ‘struct ipx_config_data’ ./linux/ipx.h:58:8: error: redefinition of ‘struct ipx_route_def’ Signed-off-by: Mikko Rapeli Signed-off-by: David S. Miller diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h index 3d48014..30f031d 100644 --- a/include/uapi/linux/ipx.h +++ b/include/uapi/linux/ipx.h @@ -1,11 +1,13 @@ #ifndef _IPX_H_ #define _IPX_H_ +#include /* for compatibility with glibc netipx/ipx.h */ #include #include #include #define IPX_NODE_LEN 6 #define IPX_MTU 576 +#if __UAPI_DEF_SOCKADDR_IPX struct sockaddr_ipx { __kernel_sa_family_t sipx_family; __be16 sipx_port; @@ -14,6 +16,7 @@ struct sockaddr_ipx { __u8 sipx_type; unsigned char sipx_zero; /* 16 byte fill */ }; +#endif /* __UAPI_DEF_SOCKADDR_IPX */ /* * So we can fit the extra info for SIOCSIFADDR into the address nicely @@ -23,12 +26,15 @@ struct sockaddr_ipx { #define IPX_DLTITF 0 #define IPX_CRTITF 1 +#if __UAPI_DEF_IPX_ROUTE_DEFINITION struct ipx_route_definition { __be32 ipx_network; __be32 ipx_router_network; unsigned char ipx_router_node[IPX_NODE_LEN]; }; +#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */ +#if __UAPI_DEF_IPX_INTERFACE_DEFINITION struct ipx_interface_definition { __be32 ipx_network; unsigned char ipx_device[16]; @@ -45,16 +51,20 @@ struct ipx_interface_definition { #define IPX_INTERNAL 2 unsigned char ipx_node[IPX_NODE_LEN]; }; - +#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */ + +#if __UAPI_DEF_IPX_CONFIG_DATA struct ipx_config_data { unsigned char ipxcfg_auto_select_primary; unsigned char ipxcfg_auto_create_interfaces; }; +#endif /* __UAPI_DEF_IPX_CONFIG_DATA */ /* * OLD Route Definition for backward compatibility. */ +#if __UAPI_DEF_IPX_ROUTE_DEF struct ipx_route_def { __be32 ipx_network; __be32 ipx_router_network; @@ -67,6 +77,7 @@ struct ipx_route_def { #define IPX_RT_BLUEBOOK 2 #define IPX_RT_ROUTED 1 }; +#endif /* __UAPI_DEF_IPX_ROUTE_DEF */ #define SIOCAIPXITFCRT (SIOCPROTOPRIVATE) #define SIOCAIPXPRISLT (SIOCPROTOPRIVATE + 1) diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index e4f048e..44b8a6b 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -139,6 +139,25 @@ #endif /* _NETINET_IN_H */ +/* Coordinate with glibc netipx/ipx.h header. */ +#if defined(__NETIPX_IPX_H) + +#define __UAPI_DEF_SOCKADDR_IPX 0 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 0 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 0 +#define __UAPI_DEF_IPX_CONFIG_DATA 0 +#define __UAPI_DEF_IPX_ROUTE_DEF 0 + +#else /* defined(__NETIPX_IPX_H) */ + +#define __UAPI_DEF_SOCKADDR_IPX 1 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#define __UAPI_DEF_IPX_CONFIG_DATA 1 +#define __UAPI_DEF_IPX_ROUTE_DEF 1 + +#endif /* defined(__NETIPX_IPX_H) */ + /* Definitions for xattr.h */ #if defined(_SYS_XATTR_H) #define __UAPI_DEF_XATTR 0 @@ -179,6 +198,13 @@ #define __UAPI_DEF_IN6_PKTINFO 1 #define __UAPI_DEF_IP6_MTUINFO 1 +/* Definitions for ipx.h */ +#define __UAPI_DEF_SOCKADDR_IPX 1 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#define __UAPI_DEF_IPX_CONFIG_DATA 1 +#define __UAPI_DEF_IPX_ROUTE_DEF 1 + /* Definitions for xattr.h */ #define __UAPI_DEF_XATTR 1 -- cgit v0.10.2 From 85b51b12115c79cce7ea1ced6c0bd0339a165d3f Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Thu, 18 Aug 2016 14:39:40 +0100 Subject: net: ipv6: Remove addresses for failures with strict DAD If DAD fails with accept_dad set to 2, global addresses and host routes are incorrectly left in place. Even though disable_ipv6 is set, contrary to documentation, the addresses are not dynamically deleted from the interface. It is only on a subsequent link down/up that these are removed. The fix is not only to set the disable_ipv6 flag, but also to call addrconf_ifdown(), which is the action to carry out when disabling IPv6. This results in the addresses and routes being deleted immediately. The DAD failure for the LL addr is determined as before via netlink, or by the absence of the LL addr (which also previously would have had to be checked for in case of an intervening link down and up). As the call to addrconf_ifdown() requires an rtnl lock, the logic to disable IPv6 when DAD fails is moved to addrconf_dad_work(). Previous behavior: root@vm1:/# sysctl net.ipv6.conf.eth3.accept_dad=2 net.ipv6.conf.eth3.accept_dad = 2 root@vm1:/# ip -6 addr add 2000::10/64 dev eth3 root@vm1:/# ip link set up eth3 root@vm1:/# ip -6 addr show dev eth3 5: eth3: mtu 1500 qlen 1000 inet6 2000::10/64 scope global valid_lft forever preferred_lft forever inet6 fe80::5054:ff:fe43:dd5a/64 scope link tentative dadfailed valid_lft forever preferred_lft forever root@vm1:/# ip -6 route show dev eth3 2000::/64 proto kernel metric 256 fe80::/64 proto kernel metric 256 root@vm1:/# ip link set down eth3 root@vm1:/# ip link set up eth3 root@vm1:/# ip -6 addr show dev eth3 root@vm1:/# ip -6 route show dev eth3 root@vm1:/# New behavior: root@vm1:/# sysctl net.ipv6.conf.eth3.accept_dad=2 net.ipv6.conf.eth3.accept_dad = 2 root@vm1:/# ip -6 addr add 2000::10/64 dev eth3 root@vm1:/# ip link set up eth3 root@vm1:/# ip -6 addr show dev eth3 root@vm1:/# ip -6 route show dev eth3 root@vm1:/# Signed-off-by: Mike Manning Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index df8425f..f418d2e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1872,7 +1872,6 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct inet6_ifaddr *ifp) { - struct in6_addr addr; struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(ifp->idev->dev); @@ -1934,18 +1933,6 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) in6_ifa_put(ifp2); lock_errdad: spin_lock_bh(&ifp->lock); - } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { - addr.s6_addr32[0] = htonl(0xfe800000); - addr.s6_addr32[1] = 0; - - if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && - ipv6_addr_equal(&ifp->addr, &addr)) { - /* DAD failed for link-local based on MAC address */ - idev->cnf.disable_ipv6 = 1; - - pr_info("%s: IPv6 being disabled!\n", - ifp->idev->dev->name); - } } errdad: @@ -3821,6 +3808,7 @@ static void addrconf_dad_work(struct work_struct *w) dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + bool disable_ipv6 = false; enum { DAD_PROCESS, @@ -3837,6 +3825,24 @@ static void addrconf_dad_work(struct work_struct *w) } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; + + if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 && + !(ifp->flags & IFA_F_STABLE_PRIVACY)) { + struct in6_addr addr; + + addr.s6_addr32[0] = htonl(0xfe800000); + addr.s6_addr32[1] = 0; + + if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && + ipv6_addr_equal(&ifp->addr, &addr)) { + /* DAD failed for link-local based on MAC */ + idev->cnf.disable_ipv6 = 1; + + pr_info("%s: IPv6 being disabled!\n", + ifp->idev->dev->name); + disable_ipv6 = true; + } + } } spin_unlock_bh(&ifp->lock); @@ -3845,6 +3851,8 @@ static void addrconf_dad_work(struct work_struct *w) goto out; } else if (action == DAD_ABORT) { addrconf_dad_stop(ifp, 1); + if (disable_ipv6) + addrconf_ifdown(idev->dev, 0); goto out; } -- cgit v0.10.2 From c0451fe1f27b815b3f400df2a63b9aecf589b7b0 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Sun, 21 Aug 2016 11:22:32 +0300 Subject: net: ip_finish_output_gso: Allow fragmenting segments of tunneled skbs if their DF is unset In b8247f095e, "net: ip_finish_output_gso: If skb_gso_network_seglen exceeds MTU, allow segmentation for local udp tunneled skbs" gso skbs arriving from an ingress interface that go through UDP tunneling, are allowed to be fragmented if the resulting encapulated segments exceed the dst mtu of the egress interface. This aligned the behavior of gso skbs to non-gso skbs going through udp encapsulation path. However the non-gso vs gso anomaly is present also in the following cases of a GRE tunnel: - ip_gre in collect_md mode, where TUNNEL_DONT_FRAGMENT is not set (e.g. OvS vport-gre with df_default=false) - ip_gre in nopmtudisc mode, where IFLA_GRE_IGNORE_DF is set In both of the above cases, the non-gso skbs get fragmented, whereas the gso skbs (having skb_gso_network_seglen that exceeds dst mtu) get dropped, as they don't go through the segment+fragment code path. Fix: Setting IPSKB_FRAG_SEGS if the tunnel specified IP_DF bit is NOT set. Tunnels that do set IP_DF, will not go to fragmentation of segments. This preserves behavior of ip_gre in (the default) pmtudisc mode. Fixes: b8247f095e ("net: ip_finish_output_gso: If skb_gso_network_seglen exceeds MTU, allow segmentation for local udp tunneled skbs") Reported-by: wenxu Cc: Hannes Frederic Sowa Signed-off-by: Shmulik Ladkani Tested-by: wenxu Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9d847c3..0f227db 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -73,9 +73,11 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && proto == IPPROTO_UDP) { - /* Arrived from an ingress interface and got udp encapuslated. - * The encapsulated network segment length may exceed dst mtu. + if (skb_iif && !(df & htons(IP_DF))) { + /* Arrived from an ingress interface, got encapsulated, with + * fragmentation of encapulating frames allowed. + * If skb is gso, the resulting encapsulated network segments + * may exceed dst mtu. * Allow IP Fragmentation of segments. */ IPCB(skb)->flags |= IPSKB_FRAG_SEGS; -- cgit v0.10.2 From 4870e704d901602e4ae5de462c4e65732cf2ed6c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 22 Aug 2016 12:03:29 +0300 Subject: qed: FLR of active VFs might lead to FW assert Driver never bothered marking the VF's vport with the VF's sw_fid. As a result, FLR flows are not going to clean those vports. If the vport was active when FLRed, re-activating it would lead to a FW assertion. Fixes: dacd88d6f6851 ("qed: IOV l2 functionality") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 35e5377..45ab746 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -561,9 +561,18 @@ struct qed_dev { static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev, u32 concrete_fid) { + u8 vfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_VFID); u8 pfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_PFID); + u8 vf_valid = GET_FIELD(concrete_fid, + PXP_CONCRETE_FID_VFVALID); + u8 sw_fid; - return pfid; + if (vf_valid) + sw_fid = vfid + MAX_NUM_PFS; + else + sw_fid = pfid; + + return sw_fid; } #define PURE_LB_TC 8 -- cgit v0.10.2 From 28a10c426e81afc88514bca8e73affccf850fdf6 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 22 Aug 2016 07:10:20 -0400 Subject: net sched: fix encoding to use real length Encoding of the metadata was using the padded length as opposed to the real length of the data which is a bug per specification. This has not been an issue todate because all metadatum specified so far has been 32 bit where aligned and data length are the same width. This also includes a bug fix for validating the length of a u16 field. But since there is no metadata of size u16 yes we are fine to include it here. While at it get rid of magic numbers. Fixes: ef6980b6becb ("net sched: introduce IFE action") Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 141a06e..e87cd81 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) u32 *tlv = (u32 *)(skbdata); u16 totlen = nla_total_size(dlen); /*alignment + hdr */ char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | totlen; + u32 htlv = attrtype << 16 | dlen; *tlv = htonl(htlv); memset(dptr, 0, totlen - NLA_HDRLEN); @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(ife_release_meta_gen); int ife_validate_meta_u32(void *val, int len) { - if (len == 4) + if (len == sizeof(u32)) return 0; return -EINVAL; @@ -144,8 +144,8 @@ EXPORT_SYMBOL_GPL(ife_validate_meta_u32); int ife_validate_meta_u16(void *val, int len) { - /* length will include padding */ - if (len == NLA_ALIGN(2)) + /* length will not include padding */ + if (len == sizeof(u16)) return 0; return -EINVAL; @@ -652,12 +652,14 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlvdata = (u8 *)tlv; u16 mtype = tlv->type; u16 mlen = tlv->len; + u16 alen; mtype = ntohs(mtype); mlen = ntohs(mlen); + alen = NLA_ALIGN(mlen); - if (find_decode_metaid(skb, ife, mtype, (mlen - 4), - (void *)(tlvdata + 4))) { + if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN), + (void *)(tlvdata + NLA_HDRLEN))) { /* abuse overlimits to count when we receive metadata * but dont have an ops for it */ @@ -666,8 +668,8 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, ife->tcf_qstats.overlimits++; } - tlvdata += mlen; - ifehdrln -= mlen; + tlvdata += alen; + ifehdrln -= alen; tlv = (struct meta_tlvhdr *)tlvdata; } -- cgit v0.10.2 From e83c6744e81abc93a20d0eb3b7f504a176a6126a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 13:59:33 -0700 Subject: udp: fix poll() issue with zero sized packets Laura tracked poll() [and friends] regression caused by commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") udp_poll() needs to know if there is a valid packet in receive queue, even if its payload length is 0. Change first_packet_length() to return an signed int, and use -1 as the indication of an empty queue. Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Reported-by: Laura Abbott Signed-off-by: Eric Dumazet Tested-by: Laura Abbott Signed-off-by: David S. Miller diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e61f7cd..00d18c5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1182,13 +1182,13 @@ out: * @sk: socket * * Drops all bad checksum frames, until a valid one is found. - * Returns the length of found skb, or 0 if none is found. + * Returns the length of found skb, or -1 if none is found. */ -static unsigned int first_packet_length(struct sock *sk) +static int first_packet_length(struct sock *sk) { struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; - unsigned int res; + int res; __skb_queue_head_init(&list_kill); @@ -1203,7 +1203,7 @@ static unsigned int first_packet_length(struct sock *sk) __skb_unlink(skb, rcvq); __skb_queue_tail(&list_kill, skb); } - res = skb ? skb->len : 0; + res = skb ? skb->len : -1; spin_unlock_bh(&rcvq->lock); if (!skb_queue_empty(&list_kill)) { @@ -1232,7 +1232,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) case SIOCINQ: { - unsigned int amount = first_packet_length(sk); + int amount = max_t(int, 0, first_packet_length(sk)); return put_user(amount, (int __user *)arg); } @@ -2184,7 +2184,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && - !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) + !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(POLLIN | POLLRDNORM); return mask; -- cgit v0.10.2 From b323431bc017e9862870cbbac004774c769ee112 Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 22 Aug 2016 15:56:38 +0200 Subject: gianfar: prevent fragmentation in DSA environments The eTSEC register MRBLR defines the maximum space in the RX buffers and is set to 1536 by gianfar. This reasonably covers the common use case where the MTU is kept at default 1500. In that case, the largest Ethernet frame size of 1518 plus an optional GMAC_FCB_LEN of 8, and an additional padding of 8 to handle FSL_GIANFAR_DEV_HAS_TIMER totals to 1534 and nicely fit within the chosen MRBLR. Alas, if the eTSEC is attached to a DSA enabled switch, the (E)DSA header extension (4 or 8 bytes) causes every maximum sized frame to be fragmented by the hardware. This patch increases the maximum RX buffer size by 8 and rounds up to the next multiple of 64, which the hardware's defines as RX buffer granularity. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 373fd09..6e8a9c8 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -100,7 +100,8 @@ extern const char gfar_driver_version[]; #define DEFAULT_RX_LFC_THR 16 #define DEFAULT_LFC_PTVVAL 4 -#define GFAR_RXB_SIZE 1536 +/* prevent fragmenation by HW in DSA environments */ +#define GFAR_RXB_SIZE roundup(1536 + 8, 64) #define GFAR_SKBFRAG_SIZE (RXBUF_ALIGNMENT + GFAR_RXB_SIZE \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define GFAR_RXB_TRUESIZE 2048 -- cgit v0.10.2 From 6c389fc931bcda88940c809f752ada6d7799482c Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 22 Aug 2016 15:58:12 +0200 Subject: gianfar: fix size of scatter-gathered frames The current scatter-gather logic in gianfar is flawed, since it does not consider the eTSEC's RxBD 'Data Length' field is context depening: for the last fragment it contains the full frame size, while fragments contain the fragment size, which equals the value written to register MRBLR. This causes data corruption as soon as the hardware starts to fragment receiving frames. As a result, the size of fragmented frames is increased by (nr_frags - 1) * MRBLR We first noticed this issue working with DSA, where an ICMP request sized 1472 bytes causes the scatter-gather logic to kick in. The full Ethernet frame (1518) gets increased by DSA (4), GMAC_FCB_LEN (8), and FSL_GIANFAR_DEV_HAS_TIMER (priv->padding=8) to a total of 1538 octets, which is fragmented by the hardware and reconstructed by the driver to a 3074 octet frame. This patch fixes the problem by adjusting the size of the last fragment. It was tested by setting MRBLR to different multiples of 64, proving correct scatter-gather operation on frames with up to 9000 octets in size. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d20935d..4b4f5bc 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2922,17 +2922,25 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, { unsigned int size = lstatus & BD_LENGTH_MASK; struct page *page = rxb->page; + bool last = !!(lstatus & BD_LFLAG(RXBD_LAST)); /* Remove the FCS from the packet length */ - if (likely(lstatus & BD_LFLAG(RXBD_LAST))) + if (last) size -= ETH_FCS_LEN; - if (likely(first)) + if (likely(first)) { skb_put(skb, size); - else - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rxb->page_offset + RXBUF_ALIGNMENT, - size, GFAR_RXB_TRUESIZE); + } else { + /* the last fragments' length contains the full frame length */ + if (last) + size -= skb->len; + + /* in case the last fragment consisted only of the FCS */ + if (size > 0) + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rxb->page_offset + RXBUF_ALIGNMENT, + size, GFAR_RXB_TRUESIZE); + } /* try reuse page */ if (unlikely(page_count(page) != 1)) -- cgit v0.10.2 From 20a2b49fc538540819a0c552877086548cff8d8d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2016 11:31:10 -0700 Subject: tcp: properly scale window in tcp_v[46]_reqsk_send_ack() When sending an ack in SYN_RECV state, we must scale the offered window if wscale option was negotiated and accepted. Tested: Following packetdrill test demonstrates the issue : 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 // Establish a connection. +0 < S 0:0(0) win 20000 +0 > S. 0:0(0) ack 1 win 28960 +0 < . 1:11(10) ack 1 win 156 // check that window is properly scaled ! +0 > . 1:1(0) ack 1 win 226 Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 32b048e..7158d4f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -814,8 +814,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt; + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ tcp_v4_send_ack(sock_net(sk), skb, seq, - tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, 0, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33df8b8..94f4f89 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -944,9 +944,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); -- cgit v0.10.2 From f64f14820e2deb5db056a05d7672ee2b1c6290e5 Mon Sep 17 00:00:00 2001 From: Xander Huff Date: Mon, 22 Aug 2016 15:57:16 -0500 Subject: phy: micrel: Reenable interrupts during resume for ksz9031 Like the ksz8081, the ksz9031 has the behavior where it will clear the interrupt enable bits when leaving power down. This takes advantage of the solution provided by f5aba91. Signed-off-by: Xander Huff Signed-off-by: Nathan Sullivan Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 053e879..885ac9c 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -964,7 +964,7 @@ static struct phy_driver ksphy_driver[] = { .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, .suspend = genphy_suspend, - .resume = genphy_resume, + .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = MICREL_PHY_ID_MASK, -- cgit v0.10.2 From 1bc261fabe866c4cdc97f52319eaa0c7ee31026e Mon Sep 17 00:00:00 2001 From: Jamie Lentin Date: Mon, 22 Aug 2016 22:47:08 +0100 Subject: net: mv88e6xxx: Fix ingress rate removal for mv6131 chips The PORT_RATE_CONTROL register works differently on 88e6095/6095f/6131 in comparison to 6123/61/65, and 0x0 disables. The distinction was lost Linux 4.1 --> 4.2 Signed-off-by: Jamie Lentin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d1d9d3c..7106790 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2656,15 +2656,19 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return ret; } + /* Rate Control: disable ingress rate limiting. */ if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || - mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) || mv88e6xxx_6320_family(chip)) { - /* Rate Control: disable ingress rate limiting. */ ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL, 0x0001); if (ret) return ret; + } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) { + ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), + PORT_RATE_CONTROL, 0x0000); + if (ret) + return ret; } /* Port Control 1: disable trunking, disable sending -- cgit v0.10.2 From 53080fe9c451e7625e71b91c384e7bef1be72b00 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 23 Aug 2016 09:48:20 -0300 Subject: net: lpc_eth: Check clk_prepare_enable() error clk_prepare_enable() may fail, so we should better check its return value and propagate it in the case of failure While at it, replace __lpc_eth_clock_enable() with a plain clk_prepare_enable/clk_disable_unprepare() call in order to simplify the code. Signed-off-by: Fabio Estevam Acked-by: Vladimir Zapolskiy Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 4d4ecba..8e13ec8 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -475,14 +475,6 @@ static void __lpc_get_mac(struct netdata_local *pldat, u8 *mac) mac[5] = tmp >> 8; } -static void __lpc_eth_clock_enable(struct netdata_local *pldat, bool enable) -{ - if (enable) - clk_prepare_enable(pldat->clk); - else - clk_disable_unprepare(pldat->clk); -} - static void __lpc_params_setup(struct netdata_local *pldat) { u32 tmp; @@ -1056,7 +1048,7 @@ static int lpc_eth_close(struct net_device *ndev) writel(0, LPC_ENET_MAC2(pldat->net_base)); spin_unlock_irqrestore(&pldat->lock, flags); - __lpc_eth_clock_enable(pldat, false); + clk_disable_unprepare(pldat->clk); return 0; } @@ -1197,11 +1189,14 @@ static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) static int lpc_eth_open(struct net_device *ndev) { struct netdata_local *pldat = netdev_priv(ndev); + int ret; if (netif_msg_ifup(pldat)) dev_dbg(&pldat->pdev->dev, "enabling %s\n", ndev->name); - __lpc_eth_clock_enable(pldat, true); + ret = clk_prepare_enable(pldat->clk); + if (ret) + return ret; /* Suspended PHY makes LPC ethernet core block, so resume now */ phy_resume(ndev->phydev); @@ -1320,7 +1315,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) } /* Enable network clock */ - __lpc_eth_clock_enable(pldat, true); + ret = clk_prepare_enable(pldat->clk); + if (ret) + goto err_out_clk_put; /* Map IO space */ pldat->net_base = ioremap(res->start, resource_size(res)); @@ -1454,6 +1451,7 @@ err_out_iounmap: iounmap(pldat->net_base); err_out_disable_clocks: clk_disable_unprepare(pldat->clk); +err_out_clk_put: clk_put(pldat->clk); err_out_free_dev: free_netdev(ndev); -- cgit v0.10.2 From a8184003c0bb1d6362c2af76c560b3caae6832cb Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 23 Aug 2016 16:31:28 +0200 Subject: dwc_eth_qos: fix interrupt enable race We currently enable interrupts before we enable NAPI. If an RX interrupt hits before we enabled NAPI then the NAPI callback is never called and we leave the hardware with RX interrupts disabled, which of course leads us to never handling received packets. Fix this by moving the interrupt enable to after we've enable NAPI and the reclaim tasklet. Fixes: cd5e41234729 ("dwc_eth_qos: do phy_start before resetting hardware") Signed-off-by: Rabin Vincent Signed-off-by: Lars Persson Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 9f159a7..5a3941b 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -1622,13 +1622,7 @@ static void dwceqos_init_hw(struct net_local *lp) DWCEQOS_MMC_CTRL_RSTONRD); dwceqos_enable_mmc_interrupt(lp); - /* Enable Interrupts */ - dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, - DWCEQOS_DMA_CH0_IE_NIE | - DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE | - DWCEQOS_DMA_CH0_IE_AIE | - DWCEQOS_DMA_CH0_IE_FBEE); - + dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, 0); dwceqos_write(lp, REG_DWCEQOS_MAC_IE, 0); dwceqos_write(lp, REG_DWCEQOS_MAC_CFG, DWCEQOS_MAC_CFG_IPC | @@ -1905,6 +1899,15 @@ static int dwceqos_open(struct net_device *ndev) netif_start_queue(ndev); tasklet_enable(&lp->tx_bdreclaim_tasklet); + /* Enable Interrupts -- do this only after we enable NAPI and the + * tasklet. + */ + dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, + DWCEQOS_DMA_CH0_IE_NIE | + DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE | + DWCEQOS_DMA_CH0_IE_AIE | + DWCEQOS_DMA_CH0_IE_FBEE); + return 0; } -- cgit v0.10.2 From 232cb53a45965f8789fbf0a9a1962f8c67ab1a3c Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Tue, 23 Aug 2016 11:40:52 -0400 Subject: sctp: fix overrun in sctp_diag_dump_one() The function sctp_diag_dump_one() currently performs a memcpy() of 64 bytes from a 16 byte field into another 16 byte field. Fix by using correct size, use sizeof to obtain correct size instead of using a hard-coded constant. Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Lance Richardson Reviewed-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index bb69153..f3508aa 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -424,11 +424,13 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb, paddr.v4.sin_family = AF_INET; } else { laddr.v6.sin6_port = req->id.idiag_sport; - memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64); + memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, + sizeof(laddr.v6.sin6_addr)); laddr.v6.sin6_family = AF_INET6; paddr.v6.sin6_port = req->id.idiag_dport; - memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64); + memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, + sizeof(paddr.v6.sin6_addr)); paddr.v6.sin6_family = AF_INET6; } -- cgit v0.10.2 From 75d855a5e93e6f3d9b37a8719d69a5318f051453 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 09:57:51 -0700 Subject: udp: get rid of SLAB_DESTROY_BY_RCU allocations After commit ca065d0cf80f ("udp: no longer use SLAB_DESTROY_BY_RCU") we do not need this special allocation mode anymore, even if it is harmless. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 00d18c5..5fdcb8d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2216,7 +2216,6 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3b3efbd..2eea073 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -55,7 +55,6 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81e2f98..19ac3a1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1460,7 +1460,6 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 9cf097e..fd6ef41 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -50,7 +50,6 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- cgit v0.10.2 From 7b996243fab46092fb3a29c773c54be8152366e4 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Tue, 23 Aug 2016 18:22:33 -0400 Subject: tun: fix transmit timestamp support Instead of using sock_tx_timestamp, use skb_tx_timestamp to record software transmit timestamp of a packet. sock_tx_timestamp resets and overrides the tx_flags of the skb. The function is intended to be called from within the protocol layer when creating the skb, not from a device driver. This is inconsistent with other drivers and will cause issues for TCP. In TCP, we intend to sample the timestamps for the last byte for each sendmsg/sendpage. For that reason, tcp_sendmsg calls tcp_tx_timestamp only with the last skb that it generates. For example, if a 128KB message is split into two 64KB packets we want to sample the SND timestamp of the last packet. The current code in the tun driver, however, will result in sampling the SND timestamp for both packets. Also, when the last packet is split into smaller packets for retranmission (see tcp_fragment), the tun driver will record timestamps for all of the retransmitted packets and not only the last packet. Fixes: eda297729171 (tun: Support software transmit time stamping.) Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Francis Yan Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9c8b5bc..6f9df37 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -894,11 +894,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) goto drop; - if (skb->sk && sk_fullsock(skb->sk)) { - sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags, - &skb_shinfo(skb)->tx_flags); - sw_tx_timestamp(skb); - } + skb_tx_timestamp(skb); /* Orphan the skb - required as we might hang on to it * for indefinite time. -- cgit v0.10.2 From d7226c7a4dd19929d6df4ae04698da2fcf6f875a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 23 Aug 2016 21:05:27 -0700 Subject: net: diag: Fix refcnt leak in error path destroying socket inet_diag_find_one_icsk takes a reference to a socket that is not released if sock_diag_destroy returns an error. Fix by changing tcp_diag_destroy to manage the refcnt for all cases and remove the sock_put calls from tcp_abort. Fixes: c1e64e298b8ca ("net: diag: Support destroying TCP sockets") Reported-by: Lorenzo Colitti Signed-off-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 032a96d..ffbb218 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3193,7 +3193,6 @@ int tcp_abort(struct sock *sk, int err) local_bh_enable(); return 0; } - sock_gen_put(sk); return -EOPNOTSUPP; } @@ -3222,7 +3221,6 @@ int tcp_abort(struct sock *sk, int err) bh_unlock_sock(sk); local_bh_enable(); release_sock(sk); - sock_put(sk); return 0; } EXPORT_SYMBOL_GPL(tcp_abort); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 4d61093..a748c74 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -54,11 +54,16 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, { struct net *net = sock_net(in_skb->sk); struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + int err; if (IS_ERR(sk)) return PTR_ERR(sk); - return sock_diag_destroy(sk, ECONNABORTED); + err = sock_diag_destroy(sk, ECONNABORTED); + + sock_gen_put(sk); + + return err; } #endif -- cgit v0.10.2 From 9afee94939e3eda4c8bf239f7727cb56e158c976 Mon Sep 17 00:00:00 2001 From: Frederic Dalleau Date: Tue, 23 Aug 2016 07:59:19 +0200 Subject: Bluetooth: Fix memory leak at end of hci requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In hci_req_sync_complete the event skb is referenced in hdev->req_skb. It is used (via hci_req_run_skb) from either __hci_cmd_sync_ev which will pass the skb to the caller, or __hci_req_sync which leaks. unreferenced object 0xffff880005339a00 (size 256): comm "kworker/u3:1", pid 1011, jiffies 4294671976 (age 107.389s) backtrace: [] kmemleak_alloc+0x49/0xa0 [] kmem_cache_alloc+0x128/0x180 [] skb_clone+0x4f/0xa0 [] hci_event_packet+0xc1/0x3290 [] hci_rx_work+0x18b/0x360 [] process_one_work+0x14a/0x440 [] worker_thread+0x43/0x4d0 [] kthread+0xc4/0xe0 [] ret_from_fork+0x1f/0x40 [] 0xffffffffffffffff Signed-off-by: Frédéric Dalleau Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c045b3c..b0e23df 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -262,6 +262,8 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, break; } + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; hdev->req_status = hdev->req_result = 0; BT_DBG("%s end: err %d", hdev->name, err); -- cgit v0.10.2 From dbb50887c8f619fc5c3489783ebc3122bc134a31 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 27 Jul 2016 11:40:14 -0700 Subject: Bluetooth: split sk_filter in l2cap_sock_recv_cb During an audit for sk_filter(), we found that rx_busy_skb handling in l2cap_sock_recv_cb() and l2cap_sock_recvmsg() looks not quite as intended. The assumption from commit e328140fdacb ("Bluetooth: Use event-driven approach for handling ERTM receive buffer") is that errors returned from sock_queue_rcv_skb() are due to receive buffer shortage. However, nothing should prevent doing a setsockopt() with SO_ATTACH_FILTER on the socket, that could drop some of the incoming skbs when handled in sock_queue_rcv_skb(). In that case sock_queue_rcv_skb() will return with -EPERM, propagated from sk_filter() and if in L2CAP_MODE_ERTM mode, wrong assumption was that we failed due to receive buffer being full. From that point onwards, due to the to-be-dropped skb being held in rx_busy_skb, we cannot make any forward progress as rx_busy_skb is never cleared from l2cap_sock_recvmsg(), due to the filter drop verdict over and over coming from sk_filter(). Meanwhile, in l2cap_sock_recv_cb() all new incoming skbs are being dropped due to rx_busy_skb being occupied. Instead, just use __sock_queue_rcv_skb() where an error really tells that there's a receive buffer issue. Split the sk_filter() and enable it for non-segmented modes at queuing time since at this point in time the skb has already been through the ERTM state machine and it has been acked, so dropping is not allowed. Instead, for ERTM and streaming mode, call sk_filter() in l2cap_data_rcv() so the packet can be dropped before the state machine sees it. Fixes: e328140fdacb ("Bluetooth: Use event-driven approach for handling ERTM receive buffer") Signed-off-by: Daniel Borkmann Signed-off-by: Mat Martineau Acked-by: Willem de Bruijn Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 54ceb1f..d4cad29b0 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -5835,6 +5836,9 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, if (chan->sdu) break; + if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE)) + break; + chan->sdu_len = get_unaligned_le16(skb->data); skb_pull(skb, L2CAP_SDULEN_SIZE); @@ -6610,6 +6614,10 @@ static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) goto drop; } + if ((chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb)) + goto drop; + if (!control->sframe) { int err; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 1842141..a8ba752 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1019,7 +1019,7 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, goto done; if (pi->rx_busy_skb) { - if (!sock_queue_rcv_skb(sk, pi->rx_busy_skb)) + if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb)) pi->rx_busy_skb = NULL; else goto done; @@ -1270,7 +1270,17 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) goto done; } - err = sock_queue_rcv_skb(sk, skb); + if (chan->mode != L2CAP_MODE_ERTM && + chan->mode != L2CAP_MODE_STREAMING) { + /* Even if no filter is attached, we could potentially + * get errors from security modules, etc. + */ + err = sk_filter(sk, skb); + if (err) + goto done; + } + + err = __sock_queue_rcv_skb(sk, skb); /* For ERTM, handle one skb that doesn't fit into the recv * buffer. This is important to do because the data frames -- cgit v0.10.2 From f888f58795b640442165e60a6fa93e8e623d01a5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Aug 2016 11:18:51 +0200 Subject: mlxsw: spectrum: Add missing flood to router port In case we have a layer 3 interface on top of a bridge (VLAN / FID RIF), then we should flood the following packet types to the router: * Broadcast: If DIP is the broadcast address of the interface, then we need to be able to get it to CPU by trapping it following route lookup. * Reserved IP multicast (224.0.0.X): Some control packets (e.g. OSPF) use this range and are trapped in the router block. Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index f33b997..af371a8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -56,6 +56,7 @@ #define MLXSW_PORT_PHY_BITS_MASK (MLXSW_PORT_MAX_PHY_PORTS - 1) #define MLXSW_PORT_CPU_PORT 0x0 +#define MLXSW_PORT_ROUTER_PORT (MLXSW_PORT_MAX_PHY_PORTS + 2) #define MLXSW_PORT_DONT_CARE (MLXSW_PORT_MAX_PORTS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1f81689..7291f2c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3324,6 +3324,39 @@ static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_find(mlxsw_sp, fid); } +static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; +} + +static u16 mlxsw_sp_flood_table_index_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; +} + +static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, + bool set) +{ + enum mlxsw_flood_table_type table_type; + char *sftr_pl; + u16 index; + int err; + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + + table_type = mlxsw_sp_flood_table_type_get(fid); + index = mlxsw_sp_flood_table_index_get(fid); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, index, table_type, + 1, MLXSW_PORT_ROUTER_PORT, set); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + + kfree(sftr_pl); + return err; +} + static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) { if (mlxsw_sp_fid_is_vfid(fid)) @@ -3360,10 +3393,14 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, if (rif == MLXSW_SP_RIF_MAX) return -ERANGE; - err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); + err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); if (err) return err; + err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); + if (err) + goto err_rif_bridge_op; + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); if (err) goto err_rif_fdb_op; @@ -3385,6 +3422,8 @@ err_rif_alloc: mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); err_rif_fdb_op: mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); +err_rif_bridge_op: + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); return err; } @@ -3404,6 +3443,8 @@ void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); + netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif); } -- cgit v0.10.2 From 51af96b53469f3b8cfcfe0504d0ff87239175b78 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 24 Aug 2016 11:18:52 +0200 Subject: mlxsw: router: Enable neighbors to be created on stacked devices Make the function mlxsw_router_neigh_construct search the rif according to the neighbour dev other than the dev that was passed to the ndo, thus allowing creating neigbhours upon stacked devices. Fixes: 6cf3c971dc84 ("mlxsw: spectrum_router: Add private neigh table") Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 90bb93b..917ddd1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -657,7 +657,7 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev, return 0; } - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); if (WARN_ON(!r)) return -EINVAL; -- cgit v0.10.2 From 90a56f72edb088c678083c32d05936c7c8d9a948 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 12 Aug 2016 15:11:28 +0300 Subject: Bluetooth: Fix bt_sock_recvmsg when MSG_TRUNC is not set Commit b5f34f9420b50c9b5876b9a2b68e96be6d629054 attempt to introduce proper handling for MSG_TRUNC but recv and variants should still work as read if no flag is passed, but because the code may set MSG_TRUNC to msg->msg_flags that shall not be used as it may cause it to be behave as if MSG_TRUNC is always, so instead of using it this changes the code to use the flags parameter which shall contain the original flags. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ece45e0..0b5f729 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -250,7 +250,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); - if (msg->msg_flags & MSG_TRUNC) + if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; -- cgit v0.10.2 From 4f34228b67246ae3b3ab1dc33b980c77c0650ef4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Aug 2016 16:02:20 +0300 Subject: Bluetooth: Fix hci_sock_recvmsg when MSG_TRUNC is not set Similar to bt_sock_recvmsg MSG_TRUNC shall be checked using the original flags not msg_flags. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6ef8a01..96f04b7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1091,7 +1091,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, skb_free_datagram(sk, skb); - if (msg->msg_flags & MSG_TRUNC) + if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; -- cgit v0.10.2 From a5de125dd46c851fc962806135953c1bd0a0f0df Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 24 Aug 2016 13:32:19 +0000 Subject: tipc: fix the error handling in tipc_udp_enable() Fix to return a negative error code in enable_mcast() error handling case, and release udp socket when necessary. Fixes: d0f91938bede ("tipc: add ip/udp media type") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index b016c01..ae7e14c 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -396,10 +396,13 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); - if (enable_mcast(ub, remote)) + err = enable_mcast(ub, remote); + if (err) goto err; return 0; err: + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); kfree(ub); return err; } -- cgit v0.10.2 From 166ee5b87866de07a3e56c1b757f2b5cabba72a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Aug 2016 09:39:02 -0700 Subject: qdisc: fix a module refcount leak in qdisc_create_dflt() Should qdisc_alloc() fail, we must release the module refcount we got right before. Fixes: 6da7c8fcbcbd ("qdisc: allow setting default queuing discipline") Signed-off-by: Eric Dumazet Acked-by: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e95b67c..657c133 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -643,18 +643,19 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc *sch; if (!try_module_get(ops->owner)) - goto errout; + return NULL; sch = qdisc_alloc(dev_queue, ops); - if (IS_ERR(sch)) - goto errout; + if (IS_ERR(sch)) { + module_put(ops->owner); + return NULL; + } sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) return sch; qdisc_destroy(sch); -errout: return NULL; } EXPORT_SYMBOL(qdisc_create_dflt); -- cgit v0.10.2 From 4f101c47791cdcb831b3ef1f831b1cc51e4fe03c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 24 Aug 2016 11:01:20 -0700 Subject: net: dsa: bcm_sf2: Fix race condition while unmasking interrupts We kept shadow copies of which interrupt sources we have enabled and disabled, but due to an order bug in how intrl2_mask_clear was defined, we could run into the following scenario: CPU0 CPU1 intrl2_1_mask_clear(..) sets INTRL2_CPU_MASK_CLEAR bcm_sf2_switch_1_isr read INTRL2_CPU_STATUS and masks with stale irq1_mask value updates irq1_mask value Which would make us loop again and again trying to process and interrupt we are not clearing since our copy of whether it was enabled before still indicates it was not. Fix this by updating the shadow copy first, and then unasking at the HW level. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 463bed8..dd446e4 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -205,8 +205,8 @@ static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val, \ static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \ u32 mask) \ { \ - intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ priv->irq##which##_mask &= ~(mask); \ + intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ } \ static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \ u32 mask) \ -- cgit v0.10.2 From c3e70edd7c2eed6acd234627a6007627f5c76e8e Mon Sep 17 00:00:00 2001 From: Xander Huff Date: Wed, 24 Aug 2016 16:47:53 -0500 Subject: Revert "phy: IRQ cannot be shared" This reverts: commit 33c133cc7598 ("phy: IRQ cannot be shared") On hardware with multiple PHY devices hooked up to the same IRQ line, allow them to share it. Sergei Shtylyov says: "I'm not sure now what was the reason I concluded that the IRQ sharing was impossible... most probably I thought that the kernel IRQ handling code exited the loop over the IRQ actions once IRQ_HANDLED was returned -- which is obviously not so in reality..." Signed-off-by: Xander Huff Signed-off-by: Nathan Sullivan Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c5dc2c36..c6f6683 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -722,8 +722,10 @@ phy_err: int phy_start_interrupts(struct phy_device *phydev) { atomic_set(&phydev->irq_disable, 0); - if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", - phydev) < 0) { + if (request_irq(phydev->irq, phy_interrupt, + IRQF_SHARED, + "phy_interrupt", + phydev) < 0) { pr_warn("%s: Can't get IRQ %d (PHY)\n", phydev->mdio.bus->name, phydev->irq); phydev->irq = PHY_POLL; -- cgit v0.10.2 From f38ff2ee7727994685494bcc4d7c274b35b5418a Mon Sep 17 00:00:00 2001 From: Anjali Singhai Jain Date: Wed, 24 Aug 2016 17:51:53 -0700 Subject: i40e: Change some init flow for the client This change makes a common flow for Client instance open during init and reset path. The Client subtask can handle both the cases instead of making a separate notify_client_of_open call. Also it may fix a bug during reset where the service task was leaking some memory and causing issues. Change-Id: I7232a32fd52b82e863abb54266fa83122f80a0cd Signed-off-by: Anjali Singhai Jain Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index e1370c5..618f184 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -199,6 +199,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi) { struct i40e_client_instance *cdev; + int ret = 0; if (!vsi) return; @@ -211,7 +212,14 @@ void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi) "Cannot locate client instance open routine\n"); continue; } - cdev->client->ops->open(&cdev->lan_info, cdev->client); + if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state))) { + ret = cdev->client->ops->open(&cdev->lan_info, + cdev->client); + if (!ret) + set_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state); + } } } mutex_unlock(&i40e_client_instance_mutex); @@ -407,12 +415,14 @@ struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, * i40e_client_add_instance - add a client instance struct to the instance list * @pf: pointer to the board struct * @client: pointer to a client struct in the client list. + * @existing: if there was already an existing instance * - * Returns cdev ptr on success, NULL on failure + * Returns cdev ptr on success or if already exists, NULL on failure **/ static struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf, - struct i40e_client *client) + struct i40e_client *client, + bool *existing) { struct i40e_client_instance *cdev; struct netdev_hw_addr *mac = NULL; @@ -421,7 +431,7 @@ struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf, mutex_lock(&i40e_client_instance_mutex); list_for_each_entry(cdev, &i40e_client_instances, list) { if ((cdev->lan_info.pf == pf) && (cdev->client == client)) { - cdev = NULL; + *existing = true; goto out; } } @@ -505,6 +515,7 @@ void i40e_client_subtask(struct i40e_pf *pf) { struct i40e_client_instance *cdev; struct i40e_client *client; + bool existing = false; int ret = 0; if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED)) @@ -528,18 +539,25 @@ void i40e_client_subtask(struct i40e_pf *pf) /* check if L2 VSI is up, if not we are not ready */ if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state)) continue; + } else { + dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n", + client->name); } /* Add the client instance to the instance list */ - cdev = i40e_client_add_instance(pf, client); + cdev = i40e_client_add_instance(pf, client, &existing); if (!cdev) continue; - /* Also up the ref_cnt of no. of instances of this client */ - atomic_inc(&client->ref_cnt); - dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", - client->name, pf->hw.pf_id, - pf->hw.bus.device, pf->hw.bus.func); + if (!existing) { + /* Also up the ref_cnt for no. of instances of this + * client. + */ + atomic_inc(&client->ref_cnt); + dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", + client->name, pf->hw.pf_id, + pf->hw.bus.device, pf->hw.bus.func); + } /* Send an Open request to the client */ atomic_inc(&cdev->ref_cnt); @@ -588,7 +606,8 @@ int i40e_lan_add_device(struct i40e_pf *pf) pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); /* Since in some cases register may have happened before a device gets - * added, we can schedule a subtask to go initiate the clients. + * added, we can schedule a subtask to go initiate the clients if + * they can be launched at probe time. */ pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; i40e_service_event_schedule(pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c6ac7a6..828ed28 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5431,7 +5431,6 @@ int i40e_open(struct net_device *netdev) wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); udp_tunnel_get_rx_info(netdev); - i40e_notify_client_of_netdev_open(vsi); return 0; } -- cgit v0.10.2 From b628d611a2a53858263fc419dba552f32431dba4 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 09:45:39 +0800 Subject: 8139cp: Fix one possible deadloop in cp_rx_poll When cp_rx_poll does not get enough packet, it will check the rx interrupt status again. If so, it will jumpt to rx_status_loop again. But the goto jump resets the rx variable as zero too. As a result, it causes one possible deadloop. Assume this case, rx_status_loop only gets the packet count which is less than budget, and (cpr16(IntrStatus) & cp_rx_intr_mask) condition is always true. It causes the deadloop happens and system is blocked. Signed-off-by: Gao Feng Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index deae10d..5297bf7 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -467,8 +467,8 @@ static int cp_rx_poll(struct napi_struct *napi, int budget) unsigned int rx_tail = cp->rx_tail; int rx; -rx_status_loop: rx = 0; +rx_status_loop: cpw16(IntrStatus, cp_rx_intr_mask); while (rx < budget) { -- cgit v0.10.2 From c234af5875ffeab39d5a2c4230a477a35987a484 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 25 Aug 2016 07:51:10 +0100 Subject: net: hns: dereference ppe_cb->ppe_common_cb if it is non-null ppe_cb->ppe_common_cb is being dereferenced before a null check is being made on it. If ppe_cb->ppe_common_cb is null then we end up with a null pointer dereference when assigning dsaf_dev. Fix this by moving the initialisation of dsaf_dev once we know ppe_cb->ppe_common_cb is OK to dereference. Signed-off-by: Colin Ian King Acked-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index ff8b6a4..6ea8722 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -328,9 +328,10 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb) { u32 port; - struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev; if (ppe_cb->ppe_common_cb) { + struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev; + port = ppe_cb->index; dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 0); } -- cgit v0.10.2 From c15e07b02bf0450bc8e60f2cc51cb42daa371417 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 25 Aug 2016 18:30:52 +0200 Subject: team: loadbalance: push lacpdus to exact delivery When team is in bridge and LACP is utilized, LACPDU packets are pushed to userspace using raw socket and there they are processed. However, since 8626c56c8279b, LACPDU skbs are dropped by bridge rx_handler so they never reach packet handlers in rx path. Fix this by explicity treat LACPDUs to be pushed to exact delivery in team rx_handler. Reported-by: Ido Schimmel Fixes: 8626c56c8279b ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index cdb19b3..b228bea 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -14,9 +14,23 @@ #include #include #include +#include #include #include +static rx_handler_result_t lb_receive(struct team *team, struct team_port *port, + struct sk_buff *skb) +{ + if (unlikely(skb->protocol == htons(ETH_P_SLOW))) { + /* LACPDU packets should go to exact delivery */ + const unsigned char *dest = eth_hdr(skb)->h_dest; + + if (is_link_local_ether_addr(dest) && dest[5] == 0x02) + return RX_HANDLER_EXACT; + } + return RX_HANDLER_ANOTHER; +} + struct lb_priv; typedef struct team_port *lb_select_tx_port_func_t(struct team *, @@ -652,6 +666,7 @@ static const struct team_mode_ops lb_mode_ops = { .port_enter = lb_port_enter, .port_leave = lb_port_leave, .port_disabled = lb_port_disabled, + .receive = lb_receive, .transmit = lb_transmit, }; -- cgit v0.10.2 From e70c70c38d7a5ced76fc8b1c4a7ccee76e9c2911 Mon Sep 17 00:00:00 2001 From: Andrew Rybchenko Date: Fri, 26 Aug 2016 11:19:34 +0100 Subject: sfc: fix potential stack corruption from running past stat bitmask On 32-bit systems, mask is only an array of 3 longs, not 4, so don't try to write to mask[3]. Also include build-time checks in case the size of the bitmask changes. Fixes: 3c36a2aded8c ("sfc: display vadaptor statistics for all interfaces") Signed-off-by: Edward Cree Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index f658fee..e00a669 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1517,13 +1517,14 @@ static void efx_ef10_get_stat_mask(struct efx_nic *efx, unsigned long *mask) } #if BITS_PER_LONG == 64 + BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 2); mask[0] = raw_mask[0]; mask[1] = raw_mask[1]; #else + BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 3); mask[0] = raw_mask[0] & 0xffffffff; mask[1] = raw_mask[0] >> 32; mask[2] = raw_mask[1] & 0xffffffff; - mask[3] = raw_mask[1] >> 32; #endif } -- cgit v0.10.2 From 9dbeea7f08f3784b152d9fb3b86beb34aad77c72 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2016 08:51:39 -0700 Subject: rhashtable: fix a memory leak in alloc_bucket_locks() If vmalloc() was successful, do not attempt a kmalloc_array() Fixes: 4cf0b354d92e ("rhashtable: avoid large lock-array allocations") Reported-by: CAI Qian Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Herbert Xu Tested-by: CAI Qian Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 5ba520b..56054e5 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -77,17 +77,18 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, size = min_t(unsigned int, size, tbl->size >> 1); if (sizeof(spinlock_t) != 0) { + tbl->locks = NULL; #ifdef CONFIG_NUMA if (size * sizeof(spinlock_t) > PAGE_SIZE && gfp == GFP_KERNEL) tbl->locks = vmalloc(size * sizeof(spinlock_t)); - else #endif if (gfp != GFP_KERNEL) gfp |= __GFP_NOWARN | __GFP_NORETRY; - tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - gfp); + if (!tbl->locks) + tbl->locks = kmalloc_array(size, sizeof(spinlock_t), + gfp); if (!tbl->locks) return -ENOMEM; for (i = 0; i < size; i++) -- cgit v0.10.2 From fe4c988bdd1cc60402a4e3ca3976a686ea991b5a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:42 +0300 Subject: net/mlx5e: Limit UMR length to the device's limitation ConnectX-4 UMR (User Memory Region) MTT translation table offset in WQE is limited to U16_MAX, before this patch we ignored that limitation and requested the maximum possible UMR translation length that the netdev might need (MAX channels * MAX pages per channel). In case of a system with #cores > 32 and when linear WQE allocation fails, falling back to using UMR WQEs will cause the RQ (Receive Queue) to get stuck. Here we limit UMR length to min(U16_MAX, max required pages) (while considering the required alignments) on driver load, by default U16_MAX is sufficient since the default RX rings value guarantees that we are in range, dynamically (on set_ringparam/set_channels) we will check if the new required UMR length (num mtts) is still in range, if not, fail the request. Fixes: bc77b240b3c5 ('net/mlx5e: Add fragmented memory support for RX multi packet WQE') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1b495ef..d63a1b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -73,8 +73,12 @@ #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ MLX5_MPWRQ_WQE_PAGE_ORDER) -#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \ - BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)) + +#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) +#define MLX5E_REQUIRED_MTTS(rqs, wqes)\ + (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX) + #define MLX5_UMR_ALIGN (2048) #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) @@ -304,6 +308,7 @@ struct mlx5e_rq { unsigned long state; int ix; + u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -814,11 +819,6 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) MLX5E_MAX_NUM_CHANNELS); } -static inline int mlx5e_get_mtt_octw(int npages) -{ - return ALIGN(npages, 8) / 2; -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4a3757e..9cfe408 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -373,6 +373,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; + u32 num_mtts; int err = 0; if (param->rx_jumbo_pending) { @@ -397,6 +398,15 @@ static int mlx5e_set_ringparam(struct net_device *dev, 1 << mlx5_max_log_rq_size(rq_wq_type)); return -EINVAL; } + + num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, param->rx_pending); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !MLX5E_VALID_NUM_MTTS(num_mtts)) { + netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", + __func__, param->rx_pending); + return -EINVAL; + } + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", __func__, param->tx_pending, @@ -454,6 +464,7 @@ static int mlx5e_set_channels(struct net_device *dev, unsigned int count = ch->combined_count; bool arfs_enabled; bool was_opened; + u32 num_mtts; int err = 0; if (!count) { @@ -472,6 +483,14 @@ static int mlx5e_set_channels(struct net_device *dev, return -EINVAL; } + num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size)); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !MLX5E_VALID_NUM_MTTS(num_mtts)) { + netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n", + __func__, count); + return -EINVAL; + } + if (priv->params.num_channels == count) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 03d944c..65360b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -340,6 +340,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; + rq->mpwqe_mtt_offset = c->ix * + MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); + rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; @@ -3233,8 +3236,8 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) struct mlx5_create_mkey_mbox_in *in; struct mlx5_mkey_seg *mkc; int inlen = sizeof(*in); - u64 npages = - priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev), + BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)); int err; in = mlx5_vzalloc(inlen); @@ -3248,10 +3251,12 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) MLX5_PERM_LOCAL_WRITE | MLX5_ACCESS_MODE_MTT; + npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages); + mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); mkc->len = cpu_to_be64(npages << PAGE_SHIFT); - mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); + mkc->xlt_oct_size = cpu_to_be32(MLX5_MTT_OCTW(npages)); mkc->log2_page_size = PAGE_SHIFT; err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 9f2a16a..bdc9e33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -324,9 +324,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, } } -static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix) +static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) { - return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS + + return rq->mpwqe_mtt_offset + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } @@ -340,7 +340,7 @@ static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5_wqe_data_seg *dseg = &wqe->data; struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); - u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix); + u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); memset(wqe, 0, sizeof(*wqe)); cseg->opmod_idx_opcode = @@ -353,9 +353,9 @@ static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; ucseg->klm_octowords = - cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE)); + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); ucseg->bsf_octowords = - cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset)); + cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); dseg->lkey = sq->mkey_be; @@ -423,7 +423,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, { struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; int mtt_sz = mlx5e_get_wqe_mtt_sz(); - u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT; + u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int i; wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * -- cgit v0.10.2 From f2fde18c52a7367a8f6cf6855e2a7174e601c8ee Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:43 +0300 Subject: net/mlx5e: Don't wait for RQ completions on close This will significantly reduce receive queue flush time on interface down. Instead of asking the firmware to flush the RQ (Receive Queue) via asynchronous completions when moved to error, we handle RQ flush manually (mlx5e_free_rx_descs) same as we did when RQ flush got timed out. This will reduce RQs flush time and speedup interface down procedure (ifconfig down) from 6 sec to 0.3 sec on a 48 cores system. Moved mlx5e_free_rx_descs en_main.c where it is needed, to keep en_rx.c free form non critical data path code for better code locality. Fixes: 6cd392a082de ('net/mlx5e: Handle RQ flush in error cases') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d63a1b8..26a7ec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -223,9 +223,8 @@ struct mlx5e_tstamp { }; enum { - MLX5E_RQ_STATE_POST_WQES_ENABLE, + MLX5E_RQ_STATE_FLUSH, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, - MLX5E_RQ_STATE_FLUSH_TIMEOUT, MLX5E_RQ_STATE_AM, }; @@ -703,7 +702,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_tx_descs(struct mlx5e_sq *sq); -void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65360b1..2463eba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -431,7 +431,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); - MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -528,6 +527,23 @@ static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) return -ETIMEDOUT; } +static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe; + __be16 wqe_ix_be; + u16 wqe_ix; + + while (!mlx5_wq_ll_is_empty(wq)) { + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } +} + static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -551,8 +567,6 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; sq->ico_wqe_info[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ @@ -569,23 +583,8 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { - int tout = 0; - int err; - - clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - - err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); - while (!mlx5_wq_ll_is_empty(&rq->wq) && !err && - tout++ < MLX5_EN_QP_FLUSH_MAX_ITER) - msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); - - if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER) - set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state); - - /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ - napi_synchronize(&rq->channel->napi); - cancel_work_sync(&rq->am.work); mlx5e_disable_rq(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bdc9e33..fee1e47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -595,26 +595,9 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) wi->free_wqe(rq, wi); } -void mlx5e_free_rx_descs(struct mlx5e_rq *rq) -{ - struct mlx5_wq_ll *wq = &rq->wq; - struct mlx5e_rx_wqe *wqe; - __be16 wqe_ix_be; - u16 wqe_ix; - - while (!mlx5_wq_ll_is_empty(wq)) { - wqe_ix_be = *wq->tail_next; - wqe_ix = be16_to_cpu(wqe_ix_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix); - rq->dealloc_wqe(rq, wqe_ix); - mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, - &wqe->next.next_wqe_index); - } -} - #define RQ_CANNOT_POST(rq) \ - (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \ - test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \ + test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { @@ -916,7 +899,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); int work_done = 0; - if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state))) + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) return 0; if (cq->decmprs_left) -- cgit v0.10.2 From 8484f9ed13b26043be80ff5774506024956eae8f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:44 +0300 Subject: net/mlx5e: Don't post fragmented MPWQE when RQ is disabled ICO (Internal control operations) SQ (Send Queue) is closed/disabled after RQ (Receive Queue). After RQ is closed an ICO SQ completion might post a fragmented MPWQE (Multi Packet Work Queue Element) into that RQ. As on regular RQ post, check if we are allowed to post to that RQ (RQ is enabled). Cleanup in-progress UMR MPWQE on mlx5e_free_rx_descs if needed. Fixes: bc77b240b3c5 ('net/mlx5e: Add fragmented memory support for RX multi packet WQE') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2463eba..e259eaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -534,6 +534,10 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) __be16 wqe_ix_be; u16 wqe_ix; + /* UMR WQE (if in progress) is always at wq->head */ + if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; wqe_ix = be16_to_cpu(wqe_ix_be); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fee1e47..b6f8ebb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -506,6 +506,12 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { + mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + return; + } + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); rq->stats.mpwqe_frag++; -- cgit v0.10.2 From 6e8dd6d6f4bd2fd6fefdbf2e73bf251e36db59af Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 29 Aug 2016 01:13:45 +0300 Subject: net/mlx5e: Don't wait for SQ completions on close Instead of asking the firmware to flush the SQ (Send Queue) via asynchronous completions when moved to error, we handle SQ flush manually (mlx5e_free_tx_descs) same as we did when SQ flush got timed out or on tx_timeout. This will reduce SQs flush time and speedup interface down procedure. Moved mlx5e_free_tx_descs to the end of en_tx.c for tx critical code locality. Fixes: 29429f3300a3 ('net/mlx5e: Timeout if SQ doesn't flush during close') Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 26a7ec7..bf722aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -369,9 +369,8 @@ struct mlx5e_sq_dma { }; enum { - MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, + MLX5E_SQ_STATE_FLUSH, MLX5E_SQ_STATE_BF_ENABLE, - MLX5E_SQ_STATE_TX_TIMEOUT, }; struct mlx5e_ico_wqe_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e259eaa..297781a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,13 +39,6 @@ #include "eswitch.h" #include "vxlan.h" -enum { - MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000, - MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20, - MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS / - MLX5_EN_QP_FLUSH_MSLEEP_QUANT, -}; - struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; @@ -827,7 +820,6 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, goto err_disable_sq; if (sq->txq) { - set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); } @@ -851,38 +843,20 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - int tout = 0; - int err; + set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + /* prevent netif_tx_wake_queue */ + napi_synchronize(&sq->channel->napi); if (sq->txq) { - clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - /* prevent netif_tx_wake_queue */ - napi_synchronize(&sq->channel->napi); netif_tx_disable_queue(sq->txq); - /* ensure hw is notified of all pending wqes */ + /* last doorbell out, godspeed .. */ if (mlx5e_sq_has_room_for(sq, 1)) mlx5e_send_nop(sq, true); - - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_ERR, false, 0); - if (err) - set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); } - /* wait till sq is empty, unless a TX timeout occurred on this SQ */ - while (sq->cc != sq->pc && - !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) { - msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); - if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER) - set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); - } - - /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ - napi_synchronize(&sq->channel->napi); - - mlx5e_free_tx_descs(sq); mlx5e_disable_sq(sq); + mlx5e_free_tx_descs(sq); mlx5e_destroy_sq(sq); } @@ -2802,7 +2776,7 @@ static void mlx5e_tx_timeout(struct net_device *dev) if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; sched_work = true; - set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); + set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index e073bf59..5f209ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -394,35 +394,6 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) -{ - struct mlx5e_tx_wqe_info *wi; - struct sk_buff *skb; - u16 ci; - int i; - - while (sq->cc != sq->pc) { - ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; - - if (!skb) { /* nop */ - sq->cc++; - continue; - } - - for (i = 0; i < wi->num_dma; i++) { - struct mlx5e_sq_dma *dma = - mlx5e_dma_get(sq, sq->dma_fifo_cc++); - - mlx5e_tx_dma_unmap(sq->pdev, dma); - } - - dev_kfree_skb_any(skb); - sq->cc += wi->num_wqebbs; - } -} - bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_sq *sq; @@ -434,7 +405,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_sq, cq); - if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state))) + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) return false; npkts = 0; @@ -512,11 +483,39 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) && - likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) { - netif_tx_wake_queue(sq->txq); - sq->stats.wake++; + mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) { + netif_tx_wake_queue(sq->txq); + sq->stats.wake++; } return (i == MLX5E_TX_CQ_POLL_BUDGET); } + +void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct sk_buff *skb; + u16 ci; + int i; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + skb = sq->skb[ci]; + wi = &sq->wqe_info[ci]; + + if (!skb) { /* nop */ + sq->cc++; + continue; + } + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, sq->dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + dev_kfree_skb_any(skb); + sq->cc += wi->num_wqebbs; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 64ae2e8..9bf33bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -51,16 +51,18 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { + struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq); struct mlx5_wq_cyc *wq; struct mlx5_cqe64 *cqe; - struct mlx5e_sq *sq; u16 sqcc; + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return; + cqe = mlx5e_get_cqe(cq); if (likely(!cqe)) return; - sq = container_of(cq, struct mlx5e_sq, cq); wq = &sq->wq; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), -- cgit v0.10.2 From cc8e9ebf952699cb6870f1366a4920d05b036e31 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 29 Aug 2016 01:13:46 +0300 Subject: net/mlx5e: Fix ethtool -g/G rx ring parameter report with striding RQ The driver RQ has two possible configurations: striding RQ and non-striding RQ. Until this patch, the driver always reported the number of hardware WQEs (ring descriptors). For non striding RQ configuration, this was OK since we have one WQE per pending packet For striding RQ, multiple packets can fit into one WQE. For better user experience we normalize the rx_pending parameter (size of wqe/mtu) as the average ring size in case of striding RQ. Fixes: 461017cb006a ('net/mlx5e: Support RX multi-packet WQE ...') Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 9cfe408..d0cf8fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -352,15 +352,61 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, sq_stats_desc, j); } +static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, + int num_wqe) +{ + int packets_per_wqe; + int stride_size; + int num_strides; + int wqe_size; + + if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return num_wqe; + + stride_size = 1 << priv->params.mpwqe_log_stride_sz; + num_strides = 1 << priv->params.mpwqe_log_num_strides; + wqe_size = stride_size * num_strides; + + packets_per_wqe = wqe_size / + ALIGN(ETH_DATA_LEN, stride_size); + return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1)); +} + +static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, + int num_packets) +{ + int packets_per_wqe; + int stride_size; + int num_strides; + int wqe_size; + int num_wqes; + + if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return num_packets; + + stride_size = 1 << priv->params.mpwqe_log_stride_sz; + num_strides = 1 << priv->params.mpwqe_log_num_strides; + wqe_size = stride_size * num_strides; + + num_packets = (1 << order_base_2(num_packets)); + + packets_per_wqe = wqe_size / + ALIGN(ETH_DATA_LEN, stride_size); + num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe); + return 1 << (order_base_2(num_wqes)); +} + static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); int rq_wq_type = priv->params.rq_wq_type; - param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type); + param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_max_log_rq_size(rq_wq_type)); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; - param->rx_pending = 1 << priv->params.log_rq_size; + param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << priv->params.log_rq_size); param->tx_pending = 1 << priv->params.log_sq_size; } @@ -370,6 +416,9 @@ static int mlx5e_set_ringparam(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); bool was_opened; int rq_wq_type = priv->params.rq_wq_type; + u32 rx_pending_wqes; + u32 min_rq_size; + u32 max_rq_size; u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; @@ -386,20 +435,29 @@ static int mlx5e_set_ringparam(struct net_device *dev, __func__); return -EINVAL; } - if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) { + + min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_min_log_rq_size(rq_wq_type)); + max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_max_log_rq_size(rq_wq_type)); + rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type, + param->rx_pending); + + if (param->rx_pending < min_rq_size) { netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - 1 << mlx5_min_log_rq_size(rq_wq_type)); + min_rq_size); return -EINVAL; } - if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) { + if (param->rx_pending > max_rq_size) { netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, - 1 << mlx5_max_log_rq_size(rq_wq_type)); + max_rq_size); return -EINVAL; } - num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, param->rx_pending); + num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, + rx_pending_wqes); if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", @@ -420,9 +478,9 @@ static int mlx5e_set_ringparam(struct net_device *dev, return -EINVAL; } - log_rq_size = order_base_2(param->rx_pending); + log_rq_size = order_base_2(rx_pending_wqes); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending); + min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes); if (log_rq_size == priv->params.log_rq_size && log_sq_size == priv->params.log_sq_size && -- cgit v0.10.2 From c8cf78fe100b0d152a1932327c24cefc0ba4bdbe Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 29 Aug 2016 01:13:47 +0300 Subject: net/mlx5e: Add ethtool counter for TX xmit_more Add a counter in ethtool for the number of times that TX xmit_more was used. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 297781a..2459c7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -155,6 +155,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; + s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; tx_offload_none += sq_stats->csum_none; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 7b9d8a9..499487c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -70,6 +70,7 @@ struct mlx5e_sw_stats { u64 tx_queue_stopped; u64 tx_queue_wake; u64 tx_queue_dropped; + u64 tx_xmit_more; u64 rx_wqe_err; u64 rx_mpwqe_filler; u64 rx_mpwqe_frag; @@ -101,6 +102,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, @@ -298,6 +300,7 @@ struct mlx5e_sq_stats { /* commonly accessed in data path */ u64 packets; u64 bytes; + u64 xmit_more; u64 tso_packets; u64 tso_bytes; u64 tso_inner_packets; @@ -324,6 +327,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5f209ad..988eca9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -375,6 +375,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.packets++; sq->stats.bytes += num_bytes; + sq->stats.xmit_more += skb->xmit_more; return NETDEV_TX_OK; dma_unmap_wqe_err: -- cgit v0.10.2 From bf50082c15eb2bc47d1922e70f424c57f36646d5 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 29 Aug 2016 01:13:48 +0300 Subject: net/mlx5e: Fix memory leak if refreshing TIRs fails Free 'in' command object also when mlx5_core_modify_tir fails. Fixes: 724b2aa15126 ("net/mlx5e: TIRs management refactoring") Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 673043c..9cce153 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -139,7 +139,7 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) struct mlx5e_tir *tir; void *in; int inlen; - int err; + int err = 0; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -151,10 +151,11 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); if (err) - return err; + goto out; } +out: kvfree(in); - return 0; + return err; } -- cgit v0.10.2 From 1722b9694ecfbc602865017c3fa6da0e3ec234d8 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 29 Aug 2016 01:13:49 +0300 Subject: net/mlx5: Add error prints when validate ETS failed Upon set ETS failure due to user invalid input, add error prints to specify the exact error to the user. Fixes: cdcf11212b22 ('net/mlx5e: Validate BW weight values of ETS') Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index caa9a3c..762af16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -127,29 +127,40 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw); } -static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets) +static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, + struct ieee_ets *ets) { int bw_sum = 0; int i; /* Validate Priority */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) + if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) { + netdev_err(netdev, + "Failed to validate ETS: priority value greater than max(%d)\n", + MLX5E_MAX_PRIORITY); return -EINVAL; + } } /* Validate Bandwidth Sum */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { - if (!ets->tc_tx_bw[i]) + if (!ets->tc_tx_bw[i]) { + netdev_err(netdev, + "Failed to validate ETS: BW 0 is illegal\n"); return -EINVAL; + } bw_sum += ets->tc_tx_bw[i]; } } - if (bw_sum != 0 && bw_sum != 100) + if (bw_sum != 0 && bw_sum != 100) { + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; + } return 0; } @@ -159,7 +170,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); int err; - err = mlx5e_dbcnl_validate_ets(ets); + err = mlx5e_dbcnl_validate_ets(netdev, ets); if (err) return err; -- cgit v0.10.2 From e5835f2833b12808c53aa621d1d3aa085706b5b3 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 29 Aug 2016 01:13:50 +0300 Subject: net/mlx5: Increase number of ethtool steering priorities Ethtool has 11 flow tables, each flow table has its own priority. Increase the number of priorities to be aligned with the number of flow tables. Fixes: 1174fce8d141 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 75bb8c8..3d6c1f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -80,7 +80,7 @@ LEFTOVERS_NUM_PRIOS) #define ETHTOOL_PRIO_NUM_LEVELS 1 -#define ETHTOOL_NUM_PRIOS 10 +#define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) /* Vlan, mac, ttc, aRFS */ #define KERNEL_NIC_PRIO_NUM_LEVELS 4 -- cgit v0.10.2 From 7d13eca09ed5e477f6ecfd97a35058762228b5e4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 27 Aug 2016 15:34:20 -0700 Subject: Documentation: networking: dsa: Remove platform device TODO Since commit 83c0afaec7b7 ("net: dsa: Add new binding implementation"), the shortcomings of the dsa platform device have been addressed, remove that TODO item. Signed-off-by: Florian Fainelli Acked-by: Andrew Lunn Signed-off-by: David S. Miller diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 9d05ed7..f20c884 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -587,26 +587,6 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device. TODO ==== -The platform device problem ---------------------------- -DSA is currently implemented as a platform device driver which is far from ideal -as was discussed in this thread: - -http://permalink.gmane.org/gmane.linux.network/329848 - -This basically prevents the device driver model to be properly used and applied, -and support non-MDIO, non-MMIO Ethernet connected switches. - -Another problem with the platform device driver approach is that it prevents the -use of a modular switch drivers build due to a circular dependency, illustrated -here: - -http://comments.gmane.org/gmane.linux.network/345803 - -Attempts of reworking this has been done here: - -https://lwn.net/Articles/643149/ - Making SWITCHDEV and DSA converge towards an unified codebase ------------------------------------------------------------- -- cgit v0.10.2 From 2fb04fdf30192ff1e2b5834e9b7745889ea8bbcb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 27 Aug 2016 17:33:03 +0100 Subject: net: smc91x: fix SMC accesses Commit b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") broke some ARM platforms through several mistakes. Firstly, the access size must correspond to the following rule: (a) at least one of 16-bit or 8-bit access size must be supported (b) 32-bit accesses are optional, and may be enabled in addition to the above. Secondly, it provides no emulation of 16-bit accesses, instead blindly making 16-bit accesses even when the platform specifies that only 8-bit is supported. Reorganise smc91x.h so we can make use of the existing 16-bit access emulation already provided - if 16-bit accesses are supported, use 16-bit accesses directly, otherwise if 8-bit accesses are supported, use the provided 16-bit access emulation. If neither, BUG(). This exactly reflects the driver behaviour prior to the commit being fixed. Since the conversion incorrectly cut down the available access sizes on several platforms, we also need to go through every platform and fix up the overly-restrictive access size: Arnd assumed that if a platform can perform 32-bit, 16-bit and 8-bit accesses, then only a 32-bit access size needed to be specified - not so, all available access sizes must be specified. This likely fixes some performance regressions in doing this: if a platform does not support 8-bit accesses, 8-bit accesses have been emulated by performing a 16-bit read-modify-write access. Tested on the Intel Assabet/Neponset platform, which supports only 8-bit accesses, which was broken by the original commit. Fixes: b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") Signed-off-by: Russell King Tested-by: Robert Jarzmik Signed-off-by: David S. Miller diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index c410d84..66070ac 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -83,7 +83,8 @@ static struct resource smc91x_resources[] = { }; static struct smc91x_platdata smc91x_platdata = { - .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_USE_DMA | SMC91X_NOWAIT, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c index 3f06cd9..056369e 100644 --- a/arch/arm/mach-pxa/xcep.c +++ b/arch/arm/mach-pxa/xcep.c @@ -120,7 +120,8 @@ static struct resource smc91x_resources[] = { }; static struct smc91x_platdata xcep_smc91x_info = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT | SMC91X_USE_DMA, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index baf1745..a0ead0a 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -93,7 +93,8 @@ static struct smsc911x_platform_config smsc911x_config = { }; static struct smc91x_platdata smc91x_platdata = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT, }; static struct platform_device realview_eth_device = { diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index 1525d7b..88149f8 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -45,7 +45,7 @@ static struct resource smc91x_resources[] = { }; static struct smc91x_platdata smc91x_platdata = { - .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT, }; static struct platform_device smc91x_device = { diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c index c6db52b..10c5777 100644 --- a/arch/blackfin/mach-bf561/boards/cm_bf561.c +++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c @@ -146,7 +146,8 @@ static struct platform_device hitachi_fb_device = { #include static struct smc91x_platdata smc91x_info = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT, .leda = RPC_LED_100_10, .ledb = RPC_LED_TX_RX, }; diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c index f35525b..57d1c43 100644 --- a/arch/blackfin/mach-bf561/boards/ezkit.c +++ b/arch/blackfin/mach-bf561/boards/ezkit.c @@ -134,7 +134,8 @@ static struct platform_device net2272_bfin_device = { #include static struct smc91x_platdata smc91x_info = { - .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, + .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | + SMC91X_NOWAIT, .leda = RPC_LED_100_10, .ledb = RPC_LED_TX_RX, }; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 726b80f..503a3b6 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2275,6 +2275,13 @@ static int smc_drv_probe(struct platform_device *pdev) if (pd) { memcpy(&lp->cfg, pd, sizeof(lp->cfg)); lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags); + + if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) { + dev_err(&pdev->dev, + "at least one of 8-bit or 16-bit access support is required.\n"); + ret = -ENXIO; + goto out_free_netdev; + } } #if IS_BUILTIN(CONFIG_OF) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 1a55c79..e17671c 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -37,6 +37,27 @@ #include /* + * Any 16-bit access is performed with two 8-bit accesses if the hardware + * can't do it directly. Most registers are 16-bit so those are mandatory. + */ +#define SMC_outw_b(x, a, r) \ + do { \ + unsigned int __val16 = (x); \ + unsigned int __reg = (r); \ + SMC_outb(__val16, a, __reg); \ + SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \ + } while (0) + +#define SMC_inw_b(a, r) \ + ({ \ + unsigned int __val16; \ + unsigned int __reg = r; \ + __val16 = SMC_inb(a, __reg); \ + __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \ + __val16; \ + }) + +/* * Define your architecture specific bus configuration parameters here. */ @@ -55,10 +76,30 @@ #define SMC_IO_SHIFT (lp->io_shift) #define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) +#define SMC_inw(a, r) \ + ({ \ + unsigned int __smc_r = r; \ + SMC_16BIT(lp) ? readw((a) + __smc_r) : \ + SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) : \ + ({ BUG(); 0; }); \ + }) + #define SMC_inl(a, r) readl((a) + (r)) #define SMC_outb(v, a, r) writeb(v, (a) + (r)) +#define SMC_outw(v, a, r) \ + do { \ + unsigned int __v = v, __smc_r = r; \ + if (SMC_16BIT(lp)) \ + __SMC_outw(__v, a, __smc_r); \ + else if (SMC_8BIT(lp)) \ + SMC_outw_b(__v, a, __smc_r); \ + else \ + BUG(); \ + } while (0) + #define SMC_outl(v, a, r) writel(v, (a) + (r)) +#define SMC_insb(a, r, p, l) readsb((a) + (r), p, l) +#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, l) #define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) #define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) @@ -66,7 +107,7 @@ #define SMC_IRQ_FLAGS (-1) /* from resource */ /* We actually can't write halfwords properly if not word aligned */ -static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg) +static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) { if ((machine_is_mainstone() || machine_is_stargate2() || machine_is_pxa_idp()) && reg & 2) { @@ -416,24 +457,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma, #if ! SMC_CAN_USE_16BIT -/* - * Any 16-bit access is performed with two 8-bit accesses if the hardware - * can't do it directly. Most registers are 16-bit so those are mandatory. - */ -#define SMC_outw(x, ioaddr, reg) \ - do { \ - unsigned int __val16 = (x); \ - SMC_outb( __val16, ioaddr, reg ); \ - SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ - } while (0) -#define SMC_inw(ioaddr, reg) \ - ({ \ - unsigned int __val16; \ - __val16 = SMC_inb( ioaddr, reg ); \ - __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ - __val16; \ - }) - +#define SMC_outw(x, ioaddr, reg) SMC_outw_b(x, ioaddr, reg) +#define SMC_inw(ioaddr, reg) SMC_inw_b(ioaddr, reg) #define SMC_insw(a, r, p, l) BUG() #define SMC_outsw(a, r, p, l) BUG() diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index 76199b7..e302c44 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -1,6 +1,16 @@ #ifndef __SMC91X_H__ #define __SMC91X_H__ +/* + * These bits define which access sizes a platform can support, rather + * than the maximal access size. So, if your platform can do 16-bit + * and 32-bit accesses to the SMC91x device, but not 8-bit, set both + * SMC91X_USE_16BIT and SMC91X_USE_32BIT. + * + * The SMC91x driver requires at least one of SMC91X_USE_8BIT or + * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is + * an invalid configuration. + */ #define SMC91X_USE_8BIT (1 << 0) #define SMC91X_USE_16BIT (1 << 1) #define SMC91X_USE_32BIT (1 << 2) -- cgit v0.10.2 From b99b43bb4bdf1d361f7487cf03d803082bbf9101 Mon Sep 17 00:00:00 2001 From: Owen Lin Date: Fri, 26 Aug 2016 13:49:09 +0800 Subject: Add Killer E2500 device ID in alx driver. Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 6453148..4eb17da 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1545,6 +1545,8 @@ static const struct pci_device_id alx_pci_tbl[] = { .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, + { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2500), + .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162), .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG }, { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) }, diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h index 0959e68..1fc2d85 100644 --- a/drivers/net/ethernet/atheros/alx/reg.h +++ b/drivers/net/ethernet/atheros/alx/reg.h @@ -38,6 +38,7 @@ #define ALX_DEV_ID_AR8161 0x1091 #define ALX_DEV_ID_E2200 0xe091 #define ALX_DEV_ID_E2400 0xe0a1 +#define ALX_DEV_ID_E2500 0xe0b1 #define ALX_DEV_ID_AR8162 0x1090 #define ALX_DEV_ID_AR8171 0x10A1 #define ALX_DEV_ID_AR8172 0x10A0 -- cgit v0.10.2