From 936523441bb64cdc9a5b263e8fd2782e70313a57 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 6 Aug 2016 15:50:52 +0200 Subject: batman-adv: Add missing refcnt for last_candidate batadv_find_router dereferences last_bonding_candidate from orig_node without making sure that it has a valid reference. This reference has to be retrieved by increasing the reference counter while holding neigh_list_lock. The lock is required to avoid that batadv_last_bonding_replace removes the current last_bonding_candidate, reduces the reference counter and maybe destroys the object in this process. Fixes: f3b3d9018975 ("batman-adv: add bonding again") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7602c00..3d19947 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -470,6 +470,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, } /** + * batadv_last_bonding_get - Get last_bonding_candidate of orig_node + * @orig_node: originator node whose last bonding candidate should be retrieved + * + * Return: last bonding candidate of router or NULL if not found + * + * The object is returned with refcounter increased by 1. + */ +static struct batadv_orig_ifinfo * +batadv_last_bonding_get(struct batadv_orig_node *orig_node) +{ + struct batadv_orig_ifinfo *last_bonding_candidate; + + spin_lock_bh(&orig_node->neigh_list_lock); + last_bonding_candidate = orig_node->last_bonding_candidate; + + if (last_bonding_candidate) + kref_get(&last_bonding_candidate->refcount); + spin_unlock_bh(&orig_node->neigh_list_lock); + + return last_bonding_candidate; +} + +/** * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node * @orig_node: originator node whose bonding candidates should be replaced * @new_candidate: new bonding candidate or NULL @@ -539,7 +562,7 @@ batadv_find_router(struct batadv_priv *bat_priv, * router - obviously there are no other candidates. */ rcu_read_lock(); - last_candidate = orig_node->last_bonding_candidate; + last_candidate = batadv_last_bonding_get(orig_node); if (last_candidate) last_cand_router = rcu_dereference(last_candidate->router); @@ -631,6 +654,9 @@ next: batadv_orig_ifinfo_put(next_candidate); } + if (last_candidate) + batadv_orig_ifinfo_put(last_candidate); + return router; } -- cgit v0.10.2 From 1e5d343b8f23770e8ac5d31f5c439826bdb35148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 23 Aug 2016 03:13:03 +0200 Subject: batman-adv: fix elp packet data reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The skb_reserve() call only reserved headroom for the mac header, but not the elp packet header itself. Fixing this by using skb_put()'ing towards the skb tail instead of skb_push()'ing towards the skb head. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 7d17001..ee08540 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -335,7 +335,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); elp_packet = (struct batadv_elp_packet *)elp_buff; memset(elp_packet, 0, BATADV_ELP_HLEN); -- cgit v0.10.2 From 5210d393ef84e5d2a4854671a9af2d97fd1b8dd4 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 2 Sep 2016 20:49:12 +0800 Subject: netfilter: nf_tables_trace: fix endiness when dump chain policy NFTA_TRACE_POLICY attribute is big endian, but we forget to call htonl to convert it. Fortunately, this attribute is parsed as big endian in libnftnl. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 39eb1cc..fa24a5b 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -237,7 +237,7 @@ void nft_trace_notify(struct nft_traceinfo *info) break; case NFT_TRACETYPE_POLICY: if (nla_put_be32(skb, NFTA_TRACE_POLICY, - info->basechain->policy)) + htonl(info->basechain->policy))) goto nla_put_failure; break; } -- cgit v0.10.2 From d1a6cba576fc7c43e476538fe5aa72fe04bd80e1 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:31:02 +0800 Subject: netfilter: nft_chain_route: re-route before skb is queued to userspace Imagine such situation, user add the following nft rules, and queue the packets to userspace for further check: # ip rule add fwmark 0x0/0x1 lookup eth0 # ip rule add fwmark 0x1/0x1 lookup eth1 # nft add table filter # nft add chain filter output {type route hook output priority 0 \;} # nft add rule filter output mark set 0x1 # nft add rule filter output queue num 0 But after we reinject the skbuff, the packet will be sent via the wrong route, i.e. in this case, the packet will be routed via eth0 table, not eth1 table. Because we skip to do re-route when verdict is NF_QUEUE, even if the mark was changed. Acctually, we should not touch sk_buff if verdict is NF_DROP or NF_STOLEN, and when re-route fails, return NF_DROP with error code. This is consistent with the mangle table in iptables. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 2375b0a..30493be 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, __be32 saddr, daddr; u_int8_t tos; const struct iphdr *iph; + int err; /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || @@ -46,15 +47,17 @@ static unsigned int nf_route_table_hook(void *priv, tos = iph->tos; ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE) { + if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || skb->mark != mark || - iph->tos != tos) - if (ip_route_me_harder(state->net, skb, RTN_UNSPEC)) - ret = NF_DROP; + iph->tos != tos) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } } return ret; } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 71d995f..2535223 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, struct in6_addr saddr, daddr; u_int8_t hop_limit; u32 mark, flowlabel; + int err; /* malformed packet, drop it */ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) @@ -46,13 +47,16 @@ static unsigned int nf_route_table_hook(void *priv, flowlabel = *((u32 *)ipv6_hdr(skb)); ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE && + if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP; + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { + err = ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } return ret; } -- cgit v0.10.2 From 2f30ea5090cbc57ea573cdc66421264b3de3fb0a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 8 Sep 2016 18:09:57 +0200 Subject: xfrm_user: propagate sec ctx allocation errors When we fail to attach the security context in xfrm_state_construct() we'll return 0 as error value which, in turn, will wrongly claim success to userland when, in fact, we won't be adding / updating the XFRM state. This is a regression introduced by commit fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl(), attach_sec_ctx(), and attach_one_addr()"). Fix it by propagating the error returned by security_xfrm_state_alloc() in this case. Fixes: fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl()...") Signed-off-by: Mathias Krause Cc: Thomas Graf Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cb65d91..0889209 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -581,9 +581,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (err) goto error; - if (attrs[XFRMA_SEC_CTX] && - security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) - goto error; + if (attrs[XFRMA_SEC_CTX]) { + err = security_xfrm_state_alloc(x, + nla_data(attrs[XFRMA_SEC_CTX])); + if (err) + goto error; + } if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) -- cgit v0.10.2 From 1fb81e09d487656aa23f2acb1232c7f56b4c2367 Mon Sep 17 00:00:00 2001 From: "thomas.zeitlhofer+lkml@ze-it.at" Date: Wed, 7 Sep 2016 20:40:38 +0200 Subject: vti: use right inner_mode for inbound inter address family policy checks In case of inter address family tunneling (IPv6 over vti4 or IPv4 over vti6), the inbound policy checks in vti_rcv_cb() and vti6_rcv_cb() are using the wrong address family. As a result, all inbound inter address family traffic is dropped. Use the xfrm_ip2inner_mode() helper, as done in xfrm_input() (i.e., also increment LINUX_MIB_XFRMINSTATEMODEERROR in case of error), to select the inner_mode that contains the right address family for the inbound policy checks. Signed-off-by: Thomas Zeitlhofer Signed-off-by: Steffen Klassert diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index cc701fa..5d7944f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -88,6 +88,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; @@ -105,7 +106,19 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d90a11f..52a2f73 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -340,6 +340,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; u32 orig_mark = skb->mark; int ret; @@ -357,7 +358,19 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(t->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); -- cgit v0.10.2 From 83843c80dcf11a78995d167255b03072a1e49c2c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 28 Aug 2016 13:10:37 +0200 Subject: mac80211: fix tim recalculation after PS response Handle the case where the mac80211 intermediate queues are empty and the driver has buffered frames Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 76b737d..aa58df8 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1616,7 +1616,6 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, sta_info_recalc_tim(sta); } else { - unsigned long tids = sta->txq_buffered_tids & driver_release_tids; int tid; /* @@ -1648,7 +1647,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!(tids & BIT(tid)) || txqi->tin.backlog_packets) + if (!(driver_release_tids & BIT(tid)) || + txqi->tin.backlog_packets) continue; sta_info_recalc_tim(sta); -- cgit v0.10.2 From df6ef5d8a87ace995d5c10a7bd684be05911a321 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 4 Sep 2016 18:00:59 +0200 Subject: mac80211: fix sequence number assignment for PS response frames When using intermediate queues, sequence number allocation is deferred until dequeue. This doesn't work for PS response frames, which bypass those queues. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5023966..cc8e955 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -796,6 +796,36 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) return ret; } +static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_txq *txq = NULL; + + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) + return NULL; + + if (!ieee80211_is_data(hdr->frame_control)) + return NULL; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + return NULL; + + return to_txq_info(txq); +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { @@ -853,7 +883,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!tx->sta->sta.txq[0]) + if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta, + tx->skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; @@ -1243,36 +1274,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } -static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, - struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_txq *txq = NULL; - - if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || - (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) - return NULL; - - if (!ieee80211_is_data(hdr->frame_control)) - return NULL; - - if (pubsta) { - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - - txq = pubsta->txq[tid]; - } else if (vif) { - txq = vif->txq; - } - - if (!txq) - return NULL; - - return to_txq_info(txq); -} - static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) { IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); @@ -3264,7 +3265,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { *ieee80211_get_qos_ctl(hdr) = tid; - if (!sta->sta.txq[0]) + if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); } else { info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; -- cgit v0.10.2 From 5df20f2141eadb5430caaad20eceac61cfe0f139 Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Tue, 6 Sep 2016 11:59:00 -0700 Subject: mac80211: make mpath path fixing more robust A fixed mpath was not quite being treated as such: 1) if a PERR frame was received, a fixed mpath was deactivated. 2) queued path discovery for fixed mpath was potentially being considered, changing mpath state. 3) other mpath flags were potentially being inherited when fixing the mpath. Just assign PATH_FIXED and SN_VALID. This solves several issues when fixing a mesh path in one direction. The reverse direction mpath should probably also be fixed, or root announcements at least be enabled. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 8f9c3bd..faccef9 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -746,6 +746,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, sta = next_hop_deref_protected(mpath); if (mpath->flags & MESH_PATH_ACTIVE && ether_addr_equal(ta, sta->sta.addr) && + !(mpath->flags & MESH_PATH_FIXED) && (!(mpath->flags & MESH_PATH_SN_VALID) || SN_GT(target_sn, mpath->sn) || target_sn == 0)) { mpath->flags &= ~MESH_PATH_ACTIVE; @@ -1012,7 +1013,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) goto enddiscovery; spin_lock_bh(&mpath->state_lock); - if (mpath->flags & MESH_PATH_DELETED) { + if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) { spin_unlock_bh(&mpath->state_lock); goto enddiscovery; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 6db2ddf..f0e6175 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -826,7 +826,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; - mpath->flags |= MESH_PATH_FIXED; + mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); mesh_path_tx_pending(mpath); -- cgit v0.10.2 From ecfcdfec7e0cc64215a194044305f02a5a836e6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 15:38:12 +0200 Subject: netfilter: nf_nat: handle NF_DROP from nfnetlink_parse_nat_setup() nf_nat_setup_info() returns NF_* verdicts, so convert them to error codes that is what ctnelink expects. This has passed overlook without having any impact since this nf_nat_setup_info() has always returned NF_ACCEPT so far. Since 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable"), this is problem. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index de31818..19c081e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -807,7 +807,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, if (err < 0) return err; - return nf_nat_setup_info(ct, &range, manip); + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; } #else static int -- cgit v0.10.2 From 4440a2ab3b9f40dddbe006331ef0659c76859296 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 13 Sep 2016 08:49:18 +0800 Subject: netfilter: synproxy: Check oom when adding synproxy and seqadj ct extensions When memory is exhausted, nfct_seqadj_ext_add may fail to add the synproxy and seqadj extensions. The function nf_ct_seqadj_init doesn't check if get valid seqadj pointer by the nfct_seqadj. Now drop the packet directly when fail to add seqadj extension to avoid dereference NULL pointer in nf_ct_seqadj_init from init_conntrack(). Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 6793614..e693731 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -27,6 +27,20 @@ static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) #endif } +static inline bool nf_ct_add_synproxy(struct nf_conn *ct, + const struct nf_conn *tmpl) +{ + if (tmpl && nfct_synproxy(tmpl)) { + if (!nfct_seqadj_ext_add(ct)) + return false; + + if (!nfct_synproxy_ext_add(ct)) + return false; + } + + return true; +} + struct synproxy_stats { unsigned int syn_received; unsigned int cookie_invalid; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dd2c43a..9934b0c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1035,9 +1035,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; - if (tmpl && nfct_synproxy(tmpl)) { - nfct_seqadj_ext_add(ct); - nfct_synproxy_ext_add(ct); + if (!nf_ct_add_synproxy(ct, tmpl)) { + nf_conntrack_free(ct); + return ERR_PTR(-ENOMEM); } timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 19c081e..ecee105 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -441,7 +441,8 @@ nf_nat_setup_info(struct nf_conn *ct, ct->status |= IPS_DST_NAT; if (nfct_help(ct)) - nfct_seqadj_ext_add(ct); + if (!nfct_seqadj_ext_add(ct)) + return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { -- cgit v0.10.2 From 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 9 Sep 2016 22:43:12 +0800 Subject: bnx2: Reset device during driver initialization When system enters into kdump kernel because of kernel panic, it won't shutdown devices. On-flight DMA will continue transferring data until device driver initializes. All devices are supposed to reset during driver initialization. And this property is used to fix the kdump failure in system with intel iommu. Other systems with hardware iommu should be similar. Please check commit 091d42e ("iommu/vt-d: Copy translation tables from old kernel") and those commits around. But bnx2 driver doesn't reset device during driver initialization. The device resetting is deferred to net device up stage. This will cause hardware iommu handling failure on bnx2 device. And its resetting relies on firmware. So in this patch move the firmware requesting code to earlier bnx2_init_one(), then next call bnx2_reset_chip to reset device. Signed-off-by: Baoquan He Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 8fc3f3c..505ceaf 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6356,10 +6356,6 @@ bnx2_open(struct net_device *dev) struct bnx2 *bp = netdev_priv(dev); int rc; - rc = bnx2_request_firmware(bp); - if (rc < 0) - goto out; - netif_carrier_off(dev); bnx2_disable_int(bp); @@ -6428,7 +6424,6 @@ open_err: bnx2_free_irq(bp); bnx2_free_mem(bp); bnx2_del_napi(bp); - bnx2_release_firmware(bp); goto out; } @@ -8575,6 +8570,12 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + rc = bnx2_request_firmware(bp); + if (rc < 0) + goto error; + + + bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -8607,6 +8608,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: + bnx2_release_firmware(bp); pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); -- cgit v0.10.2 From 715f5552b1e90ba3eecf6d1a6d044d0d5226663f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 10 Sep 2016 23:11:23 +0800 Subject: sctp: hold the transport before using it in sctp_hash_cmp Since commit 4f0087812648 ("sctp: apply rhashtable api to send/recv path"), sctp uses transport rhashtable with .obj_cmpfn sctp_hash_cmp, in which it compares the members of the transport with the rhashtable args to check if it's the right transport. But sctp uses the transport without holding it in sctp_hash_cmp, it can cause a use-after-free panic. As after it gets transport from hashtable, another CPU may close the sk and free the asoc. In sctp_association_free, it frees all the transports, meanwhile, the assoc's refcnt may be reduced to 0, assoc can be destroyed by sctp_association_destroy. So after that, transport->assoc is actually an unavailable memory address in sctp_hash_cmp. Although sctp_hash_cmp is under rcu_read_lock, it still can not avoid this, as assoc is not freed by RCU. This patch is to hold the transport before checking it's members with sctp_transport_hold, in which it checks the refcnt first, holds it if it's not 0. Fixes: 4f0087812648 ("sctp: apply rhashtable api to send/recv path") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d3..1555fb8 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -796,27 +796,34 @@ struct sctp_hash_cmp_arg { static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { + struct sctp_transport *t = (struct sctp_transport *)ptr; const struct sctp_hash_cmp_arg *x = arg->key; - const struct sctp_transport *t = ptr; - struct sctp_association *asoc = t->asoc; - const struct net *net = x->net; + struct sctp_association *asoc; + int err = 1; if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) - return 1; - if (!net_eq(sock_net(asoc->base.sk), net)) - return 1; + return err; + if (!sctp_transport_hold(t)) + return err; + + asoc = t->asoc; + if (!net_eq(sock_net(asoc->base.sk), x->net)) + goto out; if (x->ep) { if (x->ep != asoc->ep) - return 1; + goto out; } else { if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) - return 1; + goto out; if (!sctp_bind_addr_match(&asoc->base.bind_addr, x->laddr, sctp_sk(asoc->base.sk))) - return 1; + goto out; } - return 0; + err = 0; +out: + sctp_transport_put(t); + return err; } static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) -- cgit v0.10.2 From 440f895aa97f81a2bdc02993da5360a1f6da2fb5 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 11 Sep 2016 21:43:34 +0200 Subject: drivers: net: phy: xgene: Fix 'remove' function If 'IS_ERR(pdata->clk)' is true, then 'clk_disable_unprepare(pdata->clk)' will do nothing. It is likely that 'if (!IS_ERR(pdata->clk))' was expected here. In fact, the test can even be removed because 'clk_disable_unprepare' already handles such cases. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index 7756748..92af182 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -424,10 +424,8 @@ static int xgene_mdio_remove(struct platform_device *pdev) mdiobus_unregister(mdio_bus); mdiobus_free(mdio_bus); - if (dev->of_node) { - if (IS_ERR(pdata->clk)) - clk_disable_unprepare(pdata->clk); - } + if (dev->of_node) + clk_disable_unprepare(pdata->clk); return 0; } -- cgit v0.10.2 From ad5987b47e96a0fb6d13fea250e936aed000093c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:53:55 +0200 Subject: nl80211: validate number of probe response CSA counters Due to an apparent copy/paste bug, the number of counters for the beacon configuration were checked twice, instead of checking the number of probe response counters. Fix this to check the number of probe response counters before parsing those. Cc: stable@vger.kernel.org Fixes: 9a774c78e211 ("cfg80211: Support multiple CSA counters") Signed-off-by: Johannes Berg diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f02653a..4809f4d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6978,7 +6978,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) params.n_counter_offsets_presp = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && - (params.n_counter_offsets_beacon > + (params.n_counter_offsets_presp > rdev->wiphy.max_num_csa_counters)) return -EINVAL; -- cgit v0.10.2 From 0b97a484e52cb423662eb98904aad82dafcc1f10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:41:34 +0200 Subject: mac80211: check skb_linearize() return value The A-MSDU TX code (within TXQs) didn't always check the return value of skb_linearize() properly, resulting in potentially passing a frag- list SKB down to the driver even when it said it can't handle it. Fix that. Fixes: 6e0456b545456 ("mac80211: add A-MSDU tx support") Signed-off-by: Johannes Berg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index cc8e955..18b285e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1515,8 +1515,12 @@ out: spin_unlock_bh(&fq->lock); if (skb && skb_has_frag_list(skb) && - !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) - skb_linearize(skb); + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { + if (skb_linearize(skb)) { + ieee80211_free_txskb(&local->hw, skb); + return NULL; + } + } return skb; } -- cgit v0.10.2 From 85d5313ed717ad60769491c7c072d23bc0a68e7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 11:38:31 +0200 Subject: mac80211: reject TSPEC TIDs (TSIDs) for aggregation Since mac80211 doesn't currently support TSIDs 8-15 which can only be used after QoS TSPEC negotiation (and not even after WMM negotiation), reject attempts to set up aggregation sessions for them, which might confuse drivers. In mac80211 we do correctly handle that, but the TSIDs should never get used anyway, and drivers might not be able to handle it. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a9aff60..afa9468 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -261,10 +261,16 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, .timeout = timeout, .ssn = start_seq_num, }; - int i, ret = -EOPNOTSUPP; u16 status = WLAN_STATUS_REQUEST_DECLINED; + if (tid >= IEEE80211_FIRST_TSPEC_TSID) { + ht_dbg(sta->sdata, + "STA %pM requests BA session on unsupported tid %d\n", + sta->sta.addr, tid); + goto end_no_lock; + } + if (!sta->sta.ht_cap.ht_supported) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 5650c46..45319cc 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -584,6 +584,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) return -EINVAL; + if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID)) + return -EINVAL; + ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", pubsta->addr, tid); -- cgit v0.10.2 From 54c5ef2e93ea002dc5dd63349298b2778fe59edb Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Wed, 10 Aug 2016 17:03:43 +0300 Subject: iwlwifi: mvm: update TX queue before making a copy of the skb Off-channel action frames (such as ANQP frames) must be sent either on the AUX queue or on the offchannel queue, otherwise the firmware will cause a SYSASSERT. In the current implementation, the queue to be used is correctly set in the original skb, but this is done after it is copied. Thus the copy remains with the original, incorrect queue. Fix this by setting the queue in the original skb before copying it. Fixes: commit 5c08b0f5026f ("iwlwifi: mvm: don't override the rate with the AMSDU len") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Beni Lev Signed-off-by: Luca Coelho diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index c6585ab..b3a87a3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -513,6 +513,15 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) int hdrlen = ieee80211_hdrlen(hdr->frame_control); int queue; + /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used + * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel + * queue. STATION (HS2.0) uses the auxiliary context of the FW, + * and hence needs to be sent on the aux queue + */ + if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && + skb_info->control.vif->type == NL80211_IFTYPE_STATION) + IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; + memcpy(&info, skb->cb, sizeof(info)); if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU)) @@ -526,16 +535,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) /* This holds the amsdu headers length */ skb_info->driver_data[0] = (void *)(uintptr_t)0; - /* - * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used - * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel - * queue. STATION (HS2.0) uses the auxiliary context of the FW, - * and hence needs to be sent on the aux queue - */ - if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && - info.control.vif->type == NL80211_IFTYPE_STATION) - IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; - queue = info.hw_queue; /* -- cgit v0.10.2 From 2a292822f00f7409fc0bd6b2d09efc5b8e6c9c5d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 14 Sep 2016 13:09:24 +0200 Subject: net/mlx4_en: fix off by one in error handling If an error occurs in mlx4_init_eq_table the index used in the err_out_unmap label is one too big which results in a panic in mlx4_free_eq. This patch fixes the index in the error path. Signed-off-by: Sebastian Ott Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f613977..cf8f8a7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1305,8 +1305,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) return 0; err_out_unmap: - while (i >= 0) - mlx4_free_eq(dev, &priv->eq_table.eq[i--]); + while (i > 0) + mlx4_free_eq(dev, &priv->eq_table.eq[--i]); #ifdef CONFIG_RFS_ACCEL for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_priv(dev)->port[i].rmap) { -- cgit v0.10.2 From 7077dc415b113ac17a6696c432bad2d66574e4fb Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 21:29:34 +0800 Subject: net: ethernet: mediatek: fix module loading automatically based on MODULE_DEVICE_TABLE The device table is required to load modules based on modaliases. After adding MODULE_DEVICE_TABLE, below entries for example will be added to modules.alias: alias of:N*T*Cmediatek,mt7623-ethC* mtk_eth_soc Signed-off-by: Sean Wang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d919915..3743af8 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1923,6 +1923,7 @@ const struct of_device_id of_mtk_match[] = { { .compatible = "mediatek,mt7623-eth" }, {}, }; +MODULE_DEVICE_TABLE(of, of_mtk_match); static struct platform_driver mtk_driver = { .probe = mtk_probe, -- cgit v0.10.2 From 01afd972a737879c1466a12f696601a2ce91ea84 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:44 +0300 Subject: net/ibm/emac: add set mac addr callback add realization for mac address set and remove dummy callback. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 4c9771d..2dfc603 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -980,6 +980,33 @@ static void emac_set_multicast_list(struct net_device *ndev) __emac_set_multicast_list(dev); } +static int emac_set_mac_address(struct net_device *ndev, void *sa) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct sockaddr *addr = sa; + struct emac_regs __iomem *p = dev->emacp; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mutex_lock(&dev->link_lock); + + memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + + emac_rx_disable(dev); + emac_tx_disable(dev); + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + emac_tx_enable(dev); + emac_rx_enable(dev); + + mutex_unlock(&dev->link_lock); + + return 0; +} + static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) { int rx_sync_size = emac_rx_sync_size(new_mtu); @@ -2686,7 +2713,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit, .ndo_change_mtu = eth_change_mtu, }; @@ -2699,7 +2726,7 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit_sg, .ndo_change_mtu = emac_change_mtu, }; -- cgit v0.10.2 From 7106a069f45b15e63d14484e72969e64798e641c Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:45 +0300 Subject: net/ibm/emac: add mutex to 'set multicast list' for preventing race conditions within ioctl calls. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 2dfc603..7af09cb 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -977,7 +977,10 @@ static void emac_set_multicast_list(struct net_device *ndev) dev->mcast_pending = 1; return; } + + mutex_lock(&dev->link_lock); __emac_set_multicast_list(dev); + mutex_unlock(&dev->link_lock); } static int emac_set_mac_address(struct net_device *ndev, void *sa) -- cgit v0.10.2 From d6f64d725bac20df66b2eacd847fc41d7a1905e0 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Thu, 15 Sep 2016 11:40:05 +1200 Subject: net: VRF: Pass original iif to ip_route_input() The function ip_rcv_finish() calls l3mdev_ip_rcv(). On any VRF except the global VRF, this replaces skb->dev with the VRF master interface. When calling ip_route_input_noref() from here, the checks for forwarding look at this master device instead of the initial ingress interface. This will allow packets to be routed which normally would be dropped. For example, an interface that is not assigned an IP address should drop packets, but because the checking is against the master device, the packet will be forwarded. The fix here is to still call l3mdev_ip_rcv(), but remember the initial net_device. This is passed to the other functions within ip_rcv_finish, so they still see the original interface. Signed-off-by: Mark Tomlinson Acked-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 4b351af..d6feabb 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -312,6 +312,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + struct net_device *dev = skb->dev; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -341,7 +342,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) */ if (!skb_valid_dst(skb)) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + iph->tos, dev); if (unlikely(err)) { if (err == -EXDEV) __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); @@ -370,7 +371,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); /* RFC 1122 3.3.6: * -- cgit v0.10.2 From bc6c03fa3cacd31b873e36ca16ef9678269deae6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 03:45:07 +0000 Subject: nfp: fix error return code in nfp_net_netdev_open() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 73725d9dfd99 ("nfp: allocate ring SW structs dynamically") Signed-off-by: Wei Yongjun Acked-by: Jakub Kicinski Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e492..39dadfc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2044,12 +2044,16 @@ static int nfp_net_netdev_open(struct net_device *netdev) nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), GFP_KERNEL); - if (!nn->rx_rings) + if (!nn->rx_rings) { + err = -ENOMEM; goto err_free_lsc; + } nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings), GFP_KERNEL); - if (!nn->tx_rings) + if (!nn->tx_rings) { + err = -ENOMEM; goto err_free_rx_rings; + } for (r = 0; r < nn->num_r_vecs; r++) { err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); -- cgit v0.10.2 From e830baa9c3f0023769ba9aab19eb44c892769d87 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 15 Sep 2016 14:39:21 +0200 Subject: qeth: restore device features after recovery After device recovery, only a basic set of network device features is enabled on the device. If features like checksum offloading or TSO were enabled by the user before the recovery, this results in a mismatch between the network device features, that the kernel assumes to be enabled on the device, and the features actually enabled on the device. This patch tries to restore previously set features, that require changes on the device, after the recovery of a device. In case of an error, the network device's features are changed to contain only the features that are actually turned on. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index bf40063..6d4b68c4 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -999,6 +999,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, __u16, __u16, enum qeth_prot_versions); int qeth_set_features(struct net_device *, netdev_features_t); +int qeth_recover_features(struct net_device *); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); /* exports for OSN */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 7dba6c8..6ad5a14 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6131,6 +6131,35 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on) return rc; } +/* try to restore device features on a device after recovery */ +int qeth_recover_features(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + netdev_features_t recover = dev->features; + + if (recover & NETIF_F_IP_CSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM)) + recover ^= NETIF_F_IP_CSUM; + } + if (recover & NETIF_F_RXCSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM)) + recover ^= NETIF_F_RXCSUM; + } + if (recover & NETIF_F_TSO) { + if (qeth_set_ipa_tso(card, 1)) + recover ^= NETIF_F_TSO; + } + + if (recover == dev->features) + return 0; + + dev_warn(&card->gdev->dev, + "Device recovery failed to restore all offload features\n"); + dev->features = recover; + return -EIO; +} +EXPORT_SYMBOL_GPL(qeth_recover_features); + int qeth_set_features(struct net_device *dev, netdev_features_t features) { struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7bc20c5..54fd891 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1246,6 +1246,9 @@ contin: } /* this also sets saved unicast addresses */ qeth_l2_set_rx_mode(card->dev); + rtnl_lock(); + qeth_recover_features(card->dev); + rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7293466..2f51271 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3269,6 +3269,7 @@ contin: else dev_open(card->dev); qeth_l3_set_multicast_list(card->dev); + qeth_recover_features(card->dev); rtnl_unlock(); } qeth_trace_features(card); -- cgit v0.10.2 From 016930b88a1d6eb6e6b3287d593e13ca06986acc Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:22 +0200 Subject: s390/qeth: use ip_lock for hsuid configuration qeth_l3_dev_hsuid_store() changes the ip hash table, which requires the ip_lock. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 65645b1..0e00a5c 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -297,7 +297,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->u.a6.pfxlen = 0; addr->type = QETH_IP_TYPE_NORMAL; + spin_lock_bh(&card->ip_lock); qeth_l3_delete_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); } @@ -329,7 +331,10 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->type = QETH_IP_TYPE_NORMAL; } else return -ENOMEM; + + spin_lock_bh(&card->ip_lock); qeth_l3_add_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); return count; -- cgit v0.10.2 From a7531c1cc09855df5e33ceefe4fdfc2d74ccab19 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:23 +0200 Subject: s390/qeth: allow hsuid configuration in DOWN state The qeth IP address mapping logic has been reworked recently. It causes now problems to specify qeth sysfs attribute "hsuid" in DOWN state, which is allowed. Postpone registering or deregistering of IP-addresses in this case. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 2f51271..4ba82e1 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -257,6 +257,11 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) if (addr->in_progress) return -EINPROGRESS; + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_DELETE; + return 0; + } + rc = qeth_l3_deregister_addr_entry(card, addr); hash_del(&addr->hnode); @@ -296,6 +301,11 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) hash_add(card->ip_htable, &addr->hnode, qeth_l3_ipaddr_hash(addr)); + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_ADD; + return 0; + } + /* qeth_l3_register_addr_entry can go to sleep * if we add a IPV4 addr. It is caused by the reason * that SETIP ipa cmd starts ARP staff for IPV4 addr. @@ -390,12 +400,16 @@ static void qeth_l3_recover_ip(struct qeth_card *card) int i; int rc; - QETH_CARD_TEXT(card, 4, "recoverip"); + QETH_CARD_TEXT(card, 4, "recovrip"); spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { - if (addr->disp_flag == QETH_DISP_ADDR_ADD) { + if (addr->disp_flag == QETH_DISP_ADDR_DELETE) { + qeth_l3_deregister_addr_entry(card, addr); + hash_del(&addr->hnode); + kfree(addr); + } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) { if (addr->proto == QETH_PROT_IPV4) { addr->in_progress = 1; spin_unlock_bh(&card->ip_lock); @@ -407,10 +421,8 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (!rc) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) { + if (addr->ref_counter < 1) qeth_l3_delete_ip(card, addr); - kfree(addr); - } } else { hash_del(&addr->hnode); kfree(addr); -- cgit v0.10.2 From 903e48531e8b5d414c8f1960eacac24c31f60344 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:24 +0200 Subject: qeth: check not more than 16 SBALEs on the completion queue af_iucv socket programs with HiperSockets as transport make use of the qdio completion queue. Running such an af_iucv socket program may result in a crash: [90341.677709] Oops: 0038 ilc:2 [#1] SMP [90341.677743] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.6.0-20160720.0.0e86ec7.5e62689.fc23.s390xperformance #1 [90341.677744] Hardware name: IBM 2964 N96 703 (LPAR) [90341.677746] task: 00000000edb79f00 ti: 00000000edb84000 task.ti: 00000000edb84000 [90341.677748] Krnl PSW : 0704d00180000000 000000000075bc50 (qeth_qdio_input_handler+0x258/0x4e0) [90341.677756] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 000003d10391e900 0000000000000001 00000000e61e6000 0000000000000005 [90341.677759] 0000000000a9e6ec 5420040001a77400 0000000000000001 000000000000006f [90341.677761] 00000000e0d83f00 0000000000000003 0000000000000010 5420040001a77400 [90341.677784] 000000007ba8b000 0000000000943fd0 000000000075bc4e 00000000ed3b3c10 [90341.677793] Krnl Code: 000000000075bc42: e320cc180004 lg %r2,3096(%r12) 000000000075bc48: c0e5ffffc5cc brasl %r14,7547e0 #000000000075bc4e: 1816 lr %r1,%r6 >000000000075bc50: ba19b008 cs %r1,%r9,8(%r11) 000000000075bc54: ec180041017e cij %r1,1,8,75bcd6 000000000075bc5a: 5810b008 l %r1,8(%r11) 000000000075bc5e: ec16005c027e cij %r1,2,6,75bd16 000000000075bc64: 5090b008 st %r9,8(%r11) [90341.677807] Call Trace: [90341.677810] ([<000000000075bbc0>] qeth_qdio_input_handler+0x1c8/0x4e0) [90341.677812] ([<000000000070efbc>] qdio_kick_handler+0x124/0x2a8) [90341.677814] ([<0000000000713570>] __tiqdio_inbound_processing+0xf0/0xcd0) [90341.677818] ([<0000000000143312>] tasklet_action+0x92/0x120) [90341.677823] ([<00000000008b6e72>] __do_softirq+0x112/0x308) [90341.677824] ([<0000000000142bce>] irq_exit+0xd6/0xf8) [90341.677829] ([<000000000010b1d2>] do_IRQ+0x6a/0x88) [90341.677830] ([<00000000008b6322>] io_int_handler+0x112/0x220) [90341.677832] ([<0000000000102b2e>] enabled_wait+0x56/0xa8) [90341.677833] ([<0000000000000000>] (null)) [90341.677835] ([<0000000000102e32>] arch_cpu_idle+0x32/0x48) [90341.677838] ([<000000000018a126>] cpu_startup_entry+0x266/0x2b0) [90341.677841] ([<0000000000113b38>] smp_start_secondary+0x100/0x110) [90341.677843] ([<00000000008b68a6>] restart_int_handler+0x62/0x78) [90341.677845] ([<00000000008b6588>] psw_idle+0x3c/0x40) [90341.677846] Last Breaking-Event-Address: [90341.677848] [<00000000007547ec>] qeth_dbf_longtext+0xc/0xc0 [90341.677849] [90341.677850] Kernel panic - not syncing: Fatal exception in interrupt qeth_qdio_cq_handler() analyzes SBALs on this completion queue, but does not observe the limit of 16 SBAL elements per SBAL. This patch adds the additional check to process not more than 16 SBAL elements. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6ad5a14..20cf296 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3619,7 +3619,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, int e; e = 0; - while (buffer->element[e].addr) { + while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) && + buffer->element[e].addr) { unsigned long phys_aob_addr; phys_aob_addr = (unsigned long) buffer->element[e].addr; -- cgit v0.10.2 From 243f750fc6f5d8e4dec984a9a785941c67452b8f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:25 +0200 Subject: qeth: do not limit number of gso segments To reduce the need of skb_linearize() calls, gso_max_segs of qeth net_devices had been limited according to the maximum number of qdio SBAL elements. But a gso segment cannot be larger than the mtu-size, while an SBAL element can contain up to 4096 bytes. The gso_max_segs limitation limits the maximum packet size given to the qeth driver. Performance measurements with tso-enabled qeth network interfaces and mtu-size 1500 showed, that the disadvantage of smaller packets is much more severe than the advantage of fewer skb_linearize() calls. This patch gets rid of the gso_max_segs limitations in the qeth driver. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 54fd891..2081c18 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1131,7 +1131,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) qeth_l2_request_initial_mac(card); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4ba82e1..0cbbc80 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3148,7 +3148,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) netif_keep_dst(card->dev); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT); -- cgit v0.10.2 From 5722963a8e83309dad831cf6968c4c805aa342c0 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:26 +0200 Subject: qeth: do not turn on SG per default According to recent performance measurements, turning on net_device feature NETIF_F_SG only behaves well, but turning on feature NETIF_F_GSO shows bad results. Since the kernel activates NETIF_F_GSO automatically as soon as the driver configures feature NETIF_F_SG, qeth should not activate feature NETIF_F_SG per default, until the qeth problems with NETIF_F_GSO are solved. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2081c18..bb27058 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1124,8 +1124,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->hw_features |= NETIF_F_RXCSUM; card->dev->vlan_features |= NETIF_F_RXCSUM; } - /* Turn on SG per default */ - card->dev->features |= NETIF_F_SG; } card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 0cbbc80..c00f6db 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3120,7 +3120,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; - card->dev->features = NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { -- cgit v0.10.2 From 732a59cb6e7faed7a40da6665a517945c95fc895 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 15 Sep 2016 14:39:27 +0200 Subject: s390/qeth: fix setting VIPA address commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") restructured the internal address handling. This work broke setting a virtual IP address. The command echo 10.1.1.1 > /sys/bus/ccwgroup/devices//vipa/add4 fails with file exist error even if the IP address has not been set before. It turned out that the search result for the IP address search is handled incorrectly in the VIPA case. This patch fixes the setting of an virtual IP address. Signed-off-by: Thomas Richter Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c00f6db..272d9e7 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -701,7 +701,7 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); @@ -769,7 +769,7 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); -- cgit v0.10.2 From cce94483e47e8e3d74cf4475dea33f9fd4b6ad9f Mon Sep 17 00:00:00 2001 From: Filipe Manco Date: Thu, 15 Sep 2016 17:10:46 +0200 Subject: xen-netback: fix error handling on netback_probe() In case of error during netback_probe() (e.g. an entry missing on the xenstore) netback_remove() is called on the new device, which will set the device backend state to XenbusStateClosed by calling set_backend_state(). However, the backend state wasn't initialized by netback_probe() at this point, which will cause and invalid transaction and set_backend_state() to BUG(). Initialize the backend state at the beginning of netback_probe() to XenbusStateInitialising, and create two new valid state transitions on set_backend_state(), from XenbusStateInitialising to XenbusStateClosed, and from XenbusStateInitialising to XenbusStateInitWait. Signed-off-by: Filipe Manco Acked-by: Wei Liu Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 6a31f26..daf4c78 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -271,6 +271,11 @@ static int netback_probe(struct xenbus_device *dev, be->dev = dev; dev_set_drvdata(&dev->dev, be); + be->state = XenbusStateInitialising; + err = xenbus_switch_state(dev, XenbusStateInitialising); + if (err) + goto fail; + sg = 1; do { @@ -383,11 +388,6 @@ static int netback_probe(struct xenbus_device *dev, be->hotplug_script = script; - err = xenbus_switch_state(dev, XenbusStateInitWait); - if (err) - goto fail; - - be->state = XenbusStateInitWait; /* This kicks hotplug scripts, so do it immediately. */ err = backend_create_xenvif(be); @@ -492,20 +492,20 @@ static inline void backend_switch_state(struct backend_info *be, /* Handle backend state transitions: * - * The backend state starts in InitWait and the following transitions are + * The backend state starts in Initialising and the following transitions are * allowed. * - * InitWait -> Connected - * - * ^ \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | V V + * Initialising -> InitWait -> Connected + * \ + * \ ^ \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * V | V V * - * Closed <-> Closing + * Closed <-> Closing * * The state argument specifies the eventual state of the backend and the * function transitions to that state via the shortest path. @@ -515,6 +515,20 @@ static void set_backend_state(struct backend_info *be, { while (be->state != state) { switch (be->state) { + case XenbusStateInitialising: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + case XenbusStateClosing: + backend_switch_state(be, XenbusStateInitWait); + break; + case XenbusStateClosed: + backend_switch_state(be, XenbusStateClosed); + break; + default: + BUG(); + } + break; case XenbusStateClosed: switch (state) { case XenbusStateInitWait: -- cgit v0.10.2 From ffb4d6c8508657824bcef68a36b2a0f9d8c09d10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:12:33 -0700 Subject: tcp: fix overflow in __tcp_retransmit_skb() If a TCP socket gets a large write queue, an overflow can happen in a test in __tcp_retransmit_skb() preventing all retransmits. The flow then stalls and resets after timeouts. Tested: sysctl -w net.core.wmem_max=1000000000 netperf -H dest -- -s 1000000000 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdaef7f..f53d0cc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2605,7 +2605,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > - min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) + min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), + sk->sk_sndbuf)) return -EAGAIN; if (skb_still_in_host_queue(sk, skb)) -- cgit v0.10.2 From 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:48:46 -0700 Subject: net: avoid sk_forward_alloc overflows A malicious TCP receiver, sending SACK, can force the sender to split skbs in write queue and increase its memory usage. Then, when socket is closed and its write queue purged, we might overflow sk_forward_alloc (It becomes negative) sk_mem_reclaim() does nothing in this case, and more than 2GB are leaked from TCP perspective (tcp_memory_allocated is not changed) Then warnings trigger from inet_sock_destruct() and sk_stream_kill_queues() seeing a not zero sk_forward_alloc All TCP stack can be stuck because TCP is under memory pressure. A simple fix is to preemptively reclaim from sk_mem_uncharge(). This makes sure a socket wont have more than 2 MB forward allocated, after burst and idle period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/net/sock.h b/include/net/sock.h index ff5be7e..8741988 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1332,6 +1332,16 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; + + /* Avoid a possible overflow. + * TCP send queues can make this happen, if sk_mem_reclaim() + * is not called and more than 2 GBytes are released at once. + * + * If we reach 2 MBytes, reclaim 1 MBytes right now, there is + * no need to hold that much forward allocation anyway. + */ + if (unlikely(sk->sk_forward_alloc >= 1 << 21)) + __sk_mem_reclaim(sk, 1 << 20); } static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) -- cgit v0.10.2 From 8ab86c00e349cef9fb14719093a7f198bcc72629 Mon Sep 17 00:00:00 2001 From: "phil.turnbull@oracle.com" Date: Thu, 15 Sep 2016 12:41:44 -0400 Subject: irda: Free skb on irda_accept error path. skb is not freed if newsk is NULL. Rework the error path so free_skb is unconditionally called on function exit. Fixes: c3ea9fa27413 ("[IrDA] af_irda: IRDA_ASSERT cleanups") Signed-off-by: Phil Turnbull Signed-off-by: David S. Miller diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 8d2f7c9..ccc2444 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -832,7 +832,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) struct sock *sk = sock->sk; struct irda_sock *new, *self = irda_sk(sk); struct sock *newsk; - struct sk_buff *skb; + struct sk_buff *skb = NULL; int err; err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); @@ -900,7 +900,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) err = -EPERM; /* value does not seem to make sense. -arnd */ if (!new->tsap) { pr_debug("%s(), dup failed!\n", __func__); - kfree_skb(skb); goto out; } @@ -919,7 +918,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Clean up the original one to keep it in listen state */ irttp_listen(self->tsap); - kfree_skb(skb); sk->sk_ack_backlog--; newsock->state = SS_CONNECTED; @@ -927,6 +925,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) irda_connect_response(new); err = 0; out: + kfree_skb(skb); release_sock(sk); return err; } -- cgit v0.10.2 From 4496195ddd75c4ad57b783739414e69b7d79843e Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 15 Sep 2016 15:02:38 -0300 Subject: sctp: fix SSN comparision This function actually operates on u32 yet its paramteres were declared as u16, causing integer truncation upon calling. Note in patch context that ADDIP_SERIAL_SIGN_BIT is already 32 bits. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index efc0174..bafe2a0 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -382,7 +382,7 @@ enum { ADDIP_SERIAL_SIGN_BIT = (1<<31) }; -static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) +static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) { return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } -- cgit v0.10.2 From 2835d2d9e366a2985b24051d228333bfba82f3a7 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:51 +0200 Subject: bna: add missing per queue ethtool stat Commit ba5ca784 "bna: check for dma mapping errors" added besides other things a statistic that counts number of DMA buffer mapping failures per each Rx queue. This counter is not included in ethtool stats output. Fixes: ba5ca784 "bna: check for dma mapping errors" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 0e4fdc3..5671353 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -31,7 +31,7 @@ #define BNAD_NUM_TXF_COUNTERS 12 #define BNAD_NUM_RXF_COUNTERS 10 #define BNAD_NUM_CQ_COUNTERS (3 + 5) -#define BNAD_NUM_RXQ_COUNTERS 6 +#define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 #define BNAD_ETHTOOL_STATS_NUM \ @@ -658,6 +658,8 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_consumer_index", q_num); @@ -678,6 +680,9 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", + q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; -- cgit v0.10.2 From 37dd348270c1a48f0234354a06c0ce052b6c85b1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:52 +0200 Subject: bna: fix crash in bnad_get_strings() Commit 6e7333d "net: add rx_nohandler stat counter" added the new entry rx_nohandler into struct rtnl_link_stats64. Unfortunately the bna driver foolishly depends on the structure. It uses part of it for ethtool statistics and it's not bad but the driver assumes its size is constant as it defines string for each existing entry. The problem occurs when the structure is extended because you need to modify bna driver as well. If not any attempt to retrieve ethtool statistics results in crash in bnad_get_strings(). The patch changes BNAD_ETHTOOL_STATS_NUM so it counts real number of strings in the array and also removes rtnl_link_stats64 entries that are not used in output and are always zero. Fixes: 6e7333d "net: add rx_nohandler stat counter" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 5671353..31f61a7 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -34,12 +34,7 @@ #define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 -#define BNAD_ETHTOOL_STATS_NUM \ - (sizeof(struct rtnl_link_stats64) / sizeof(u64) + \ - sizeof(struct bnad_drv_stats) / sizeof(u64) + \ - offsetof(struct bfi_enet_stats, rxf_stats[0]) / sizeof(u64)) - -static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { +static const char *bnad_net_stats_strings[] = { "rx_packets", "tx_packets", "rx_bytes", @@ -50,22 +45,10 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_dropped", "multicast", "collisions", - "rx_length_errors", - "rx_over_errors", "rx_crc_errors", "rx_frame_errors", - "rx_fifo_errors", - "rx_missed_errors", - - "tx_aborted_errors", - "tx_carrier_errors", "tx_fifo_errors", - "tx_heartbeat_errors", - "tx_window_errors", - - "rx_compressed", - "tx_compressed", "netif_queue_stop", "netif_queue_wakeup", @@ -254,6 +237,8 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "fc_tx_fid_parity_errors", }; +#define BNAD_ETHTOOL_STATS_NUM ARRAY_SIZE(bnad_net_stats_strings) + static int bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) { @@ -859,9 +844,9 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *buf) { struct bnad *bnad = netdev_priv(netdev); - int i, j, bi; + int i, j, bi = 0; unsigned long flags; - struct rtnl_link_stats64 *net_stats64; + struct rtnl_link_stats64 net_stats64; u64 *stats64; u32 bmap; @@ -876,14 +861,25 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, * under the same lock */ spin_lock_irqsave(&bnad->bna_lock, flags); - bi = 0; - memset(buf, 0, stats->n_stats * sizeof(u64)); - - net_stats64 = (struct rtnl_link_stats64 *)buf; - bnad_netdev_qstats_fill(bnad, net_stats64); - bnad_netdev_hwstats_fill(bnad, net_stats64); - bi = sizeof(*net_stats64) / sizeof(u64); + memset(&net_stats64, 0, sizeof(net_stats64)); + bnad_netdev_qstats_fill(bnad, &net_stats64); + bnad_netdev_hwstats_fill(bnad, &net_stats64); + + buf[bi++] = net_stats64.rx_packets; + buf[bi++] = net_stats64.tx_packets; + buf[bi++] = net_stats64.rx_bytes; + buf[bi++] = net_stats64.tx_bytes; + buf[bi++] = net_stats64.rx_errors; + buf[bi++] = net_stats64.tx_errors; + buf[bi++] = net_stats64.rx_dropped; + buf[bi++] = net_stats64.tx_dropped; + buf[bi++] = net_stats64.multicast; + buf[bi++] = net_stats64.collisions; + buf[bi++] = net_stats64.rx_length_errors; + buf[bi++] = net_stats64.rx_crc_errors; + buf[bi++] = net_stats64.rx_frame_errors; + buf[bi++] = net_stats64.tx_fifo_errors; /* Get netif_queue_stopped from stack */ bnad->stats.drv_stats.netif_queue_stopped = netif_queue_stopped(netdev); -- cgit v0.10.2 From 19cd120319ef5390404a5d9c829c3a7962f184a8 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 16 Sep 2016 10:50:13 +0200 Subject: stmmac: fix PWRDWN into the PMT register for global unicast. MAC devices use the RWKPKTEN and MGKPKTEN bits of the PMT Control/Status register to generate power management events. So this patch is to properly set the RWKPKTEN [BIT(2)] inside the PMT register (needed in case of global unicast). Reported-by: Aditi SHARMA Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index cbefe9e..885a5e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -261,7 +261,7 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index df5580d..51019b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -102,7 +102,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); -- cgit v0.10.2 From b588479358ce26f32138e0f0a7ab0678f8e3e601 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 18 Sep 2016 07:42:53 +0000 Subject: xfrm: Fix memory leak of aead algorithm name commit 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") introduced aead. The function attach_aead kmemdup()s the algorithm name during xfrm_state_construct(). However this memory is never freed. Implementation has since been slightly modified in commit ee5c23176fcc ("xfrm: Clone states properly on migration") without resolving this leak. This patch adds a kfree() call for the aead algorithm name. Fixes: 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") Signed-off-by: Ilan Tayari Acked-by: Rami Rosen Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c..a30f898d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -332,6 +332,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) { tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); + kfree(x->aead); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); -- cgit v0.10.2 From 4de349e786a3a2d51bd02d56f3de151bbc3c3df9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 17 Aug 2016 12:41:08 -0300 Subject: can: flexcan: fix resume function On a imx6ul-pico board the following error is seen during system suspend: dpm_run_callback(): platform_pm_resume+0x0/0x54 returns -110 PM: Device 2090000.flexcan failed to resume: error -110 The reason for this suspend error is because when the CAN interface is not active the clocks are disabled and then flexcan_chip_enable() will always fail due to a timeout error. In order to fix this issue, only call flexcan_chip_enable/disable() when the CAN interface is active. Based on a patch from Dong Aisheng in the NXP kernel. Signed-off-by: Fabio Estevam Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 41c0fc9..16f7cad 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1268,11 +1268,10 @@ static int __maybe_unused flexcan_suspend(struct device *device) struct flexcan_priv *priv = netdev_priv(dev); int err; - err = flexcan_chip_disable(priv); - if (err) - return err; - if (netif_running(dev)) { + err = flexcan_chip_disable(priv); + if (err) + return err; netif_stop_queue(dev); netif_device_detach(dev); } @@ -1285,13 +1284,17 @@ static int __maybe_unused flexcan_resume(struct device *device) { struct net_device *dev = dev_get_drvdata(device); struct flexcan_priv *priv = netdev_priv(dev); + int err; priv->can.state = CAN_STATE_ERROR_ACTIVE; if (netif_running(dev)) { netif_device_attach(dev); netif_start_queue(dev); + err = flexcan_chip_enable(priv); + if (err) + return err; } - return flexcan_chip_enable(priv); + return 0; } static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume); -- cgit v0.10.2 From babd6134a54d70efe875fa5661a20eaecb63f278 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 18 Sep 2016 18:20:27 +0300 Subject: net/mlx5: Fix flow counter bulk command out mailbox allocation The FW command output length should be only the length of struct mlx5_cmd_fc_bulk out field. Failing to do so will cause the memcpy call which is invoked later in the driver to write over wrong memory address and corrupt kernel memory which results in random crashes. This bug was found using the kernel address sanitizer (kasan). Fixes: a351a1b03bf1 ('net/mlx5: Introduce bulk reading of flow counters') Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 9134010..287ade1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -425,11 +425,11 @@ struct mlx5_cmd_fc_bulk * mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) { struct mlx5_cmd_fc_bulk *b; - int outlen = sizeof(*b) + + int outlen = MLX5_ST_SZ_BYTES(query_flow_counter_out) + MLX5_ST_SZ_BYTES(traffic_counter) * num; - b = kzalloc(outlen, GFP_KERNEL); + b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); if (!b) return NULL; -- cgit v0.10.2 From 4eea37d7b92076fdeac2a21e5f4dbd92d286719d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:28 +0300 Subject: net/mlx5: E-Switch, Fix error flow in the SRIOV e-switch init code When enablement of the SRIOV e-switch in certain mode (switchdev or legacy) fails, we must set the mode to none. Otherwise, we'll run into double free based crashes when further attempting to deal with the e-switch (such as when disabling sriov or unloading the driver). Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8b78f15..b247949 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1554,6 +1554,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + esw->mode = SRIOV_NONE; return err; } -- cgit v0.10.2 From 6c419ba8e2580ab17c164db6e918e163d3537ec1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:29 +0300 Subject: net/mlx5: E-Switch, Handle mode change failures E-switch mode changes involve creating HW tables, potentially allocating netdevices, etc, and things can fail. Add an attempt to rollback to the existing mode when changing to the new mode fails. Only if rollback fails, getting proper SRIOV functionality requires module unload or sriov disablement/enablement. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3dc83a9..7de40e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -446,7 +446,7 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; if (esw->mode != SRIOV_LEGACY) { esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); @@ -455,8 +455,12 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); - if (err) - esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + } return err; } @@ -508,12 +512,16 @@ create_ft_err: static int esw_offloads_stop(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); - if (err) - esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + } return err; } -- cgit v0.10.2 From a435a07f9164dda7c0c26e8ad758881f4bafc127 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sun, 18 Sep 2016 17:46:07 +0200 Subject: net: ipv6: fallback to full lookup if table lookup is unsuitable Commit 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") introduced a regression: insertion of an IPv6 route in a table not containing the appropriate connected route for the gateway but which contained a non-connected route (like a default gateway) fails while it was previously working: $ ip link add eth0 type dummy $ ip link set up dev eth0 $ ip addr add 2001:db8::1/64 dev eth0 $ ip route add ::/0 via 2001:db8::5 dev eth0 table 20 $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 RTNETLINK answers: No route to host $ ip -6 route show table 20 default via 2001:db8::5 dev eth0 metric 1024 pref medium After this patch, we get: $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 $ ip -6 route show table 20 2001:db8:cafe::1 via 2001:db8::6 dev eth0 metric 1024 pref medium default via 2001:db8::5 dev eth0 metric 1024 pref medium Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Signed-off-by: Vincent Bernat Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4981755..e3a224b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1986,9 +1986,18 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; - if (cfg->fc_table) + if (cfg->fc_table) { grt = ip6_nh_lookup_table(net, cfg, gw_addr); + if (grt) { + if (grt->rt6i_flags & RTF_GATEWAY || + (dev && dev != grt->dst.dev)) { + ip6_rt_put(grt); + grt = NULL; + } + } + } + if (!grt) grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); -- cgit v0.10.2 From 3ed6e498b91a4dc5d0e8b6270a6c144061db2455 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 18 Sep 2016 21:17:19 +0200 Subject: MAINTAINERS: Add an entry for the core network DSA code The core distributed switch architecture code currently does not have a MAINTAINERS entry, which results in some contributions not landing in the right peoples inbox. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Acked-by: Vivien Didelot Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index a5e1270..247b418 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8160,6 +8160,15 @@ S: Maintained W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c +NETWORKING [DSA] +M: Andrew Lunn +M: Vivien Didelot +M: Florian Fainelli +S: Maintained +F: net/dsa/ +F: include/net/dsa.h +F: drivers/net/dsa/ + NETWORKING [GENERAL] M: "David S. Miller" L: netdev@vger.kernel.org -- cgit v0.10.2 From 67a99b7061c07b190ac6c39f136afedbb7aa86e9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 19 Sep 2016 17:47:41 +0300 Subject: qed: Fix stack corruption on probe Commit fe56b9e6a8d95 ("qed: Add module with basic common support") has introduced a stack corruption during probe, where filling a local struct with data to be sent to management firmware is incorrectly filled; The data is written outside of the struct and corrupts the stack. Changes from v1: ---------------- - Correct the value written [Caught by David Laight] Fixes: fe56b9e6a8d95 ("qed: Add module with basic common support") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index a240f26..f776a77 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1153,8 +1153,8 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, p_drv_version = &union_data.drv_version; p_drv_version->version = p_ver->version; - for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) { - val = cpu_to_be32(p_ver->name[i]); + for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) { + val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)])); *(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val; } -- cgit v0.10.2 From 9b86a8d19bd6406a10de5f924bf2a003a502d427 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 20 Sep 2016 12:00:52 +0530 Subject: cxgb4/cxgb4vf: Allocate more queues for 25G and 100G adapter We were missing check for 25G and 100G while checking port speed, which lead to less number of queues getting allocated for 25G & 100G adapters and leading to low throughput. Adding the missing check for both NIC and vNIC driver. Also fixes port advertisement for 25G and 100G in ethtool output. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 2e2aa9f..edd2338 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -419,8 +419,8 @@ struct link_config { unsigned short supported; /* link capabilities */ unsigned short advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c762a8c..3ceafb55 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4305,10 +4305,17 @@ static const struct pci_error_handlers cxgb4_eeh = { .resume = eeh_resume, }; +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, @@ -4756,8 +4763,12 @@ static void print_port_info(const struct net_device *dev) bufp += sprintf(bufp, "1000/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) bufp += sprintf(bufp, "10G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) + bufp += sprintf(bufp, "25G/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) bufp += sprintf(bufp, "40G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) + bufp += sprintf(bufp, "100G/"); if (bufp != buf) --bufp; sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index dc92c80..660204b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3627,7 +3627,8 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf) } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ FW_PORT_CAP_ANEG) /** @@ -7196,8 +7197,12 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; lc = &pi->link_cfg; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a89b307..30507d4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -2265,6 +2265,12 @@ enum fw_port_cap { FW_PORT_CAP_802_3_ASM_DIR = 0x8000, }; +#define FW_PORT_CAP_SPEED_S 0 +#define FW_PORT_CAP_SPEED_M 0x3f +#define FW_PORT_CAP_SPEED_V(x) ((x) << FW_PORT_CAP_SPEED_S) +#define FW_PORT_CAP_SPEED_G(x) \ + (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M) + enum fw_port_mdi { FW_PORT_CAP_MDI_UNCHANGED, FW_PORT_CAP_MDI_AUTO, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 8ee5414..17a2bbc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -108,8 +108,8 @@ struct link_config { unsigned int supported; /* link capabilities */ unsigned int advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ @@ -271,10 +271,17 @@ static inline bool is_10g_port(const struct link_config *lc) return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0; } +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline unsigned int core_ticks_per_usec(const struct adapter *adapter) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 427bfa7..b5622b1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -314,8 +314,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ - FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG) + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ + FW_PORT_CAP_ANEG) /** * init_link_config - initialize a link's SW state @@ -1712,8 +1713,12 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; /* * Scan all of our "ports" (Virtual Interfaces) looking for -- cgit v0.10.2 From e6449539828ac3b7c74b648793291640bcca8259 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:05 +0800 Subject: r8152: move some functions Move the following functions forward. r8152_mmd_indirect() r8152_mmd_read() r8152_mmd_write() r8152_eee_en() r8152b_enable_eee() r8153_eee_en() r8153_enable_eee() r8152b_enable_fc() r8153_aldps_en() Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f41a8ad..ae7db46 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2552,6 +2552,77 @@ static void r8152_aldps_en(struct r8152 *tp, bool enable) } } +static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) +{ + ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); + ocp_reg_write(tp, OCP_EEE_DATA, reg); + ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); +} + +static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) +{ + u16 data; + + r8152_mmd_indirect(tp, dev, reg); + data = ocp_reg_read(tp, OCP_EEE_DATA); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); + + return data; +} + +static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) +{ + r8152_mmd_indirect(tp, dev, reg); + ocp_reg_write(tp, OCP_EEE_DATA, data); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); +} + +static void r8152_eee_en(struct r8152 *tp, bool enable) +{ + u16 config1, config2, config3; + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; + config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); + config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; + config1 |= sd_rise_time(1); + config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; + config3 |= fast_snr(42); + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | + RX_QUIET_EN); + config1 |= sd_rise_time(7); + config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); + config3 |= fast_snr(511); + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); + ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); + ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); +} + +static void r8152b_enable_eee(struct r8152 *tp) +{ + r8152_eee_en(tp, true); + r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); +} + +static void r8152b_enable_fc(struct r8152 *tp) +{ + u16 anar; + + anar = r8152_mdio_read(tp, MII_ADVERTISE); + anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + r8152_mdio_write(tp, MII_ADVERTISE, anar); +} + static void rtl8152_disable(struct r8152 *tp) { r8152_aldps_en(tp, false); @@ -2701,6 +2772,47 @@ static void r8152b_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } +static void r8153_aldps_en(struct r8152 *tp, bool enable) +{ + u16 data; + + data = ocp_reg_read(tp, OCP_POWER_CFG); + if (enable) { + data |= EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + } else { + data &= ~EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + msleep(20); + } +} + +static void r8153_eee_en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + u16 config; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config = ocp_reg_read(tp, OCP_EEE_CFG); + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config |= EEE10_EN; + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config &= ~EEE10_EN; + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CFG, config); +} + +static void r8153_enable_eee(struct r8152 *tp) +{ + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); +} + static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2866,21 +2978,6 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } -static void r8153_aldps_en(struct r8152 *tp, bool enable) -{ - u16 data; - - data = ocp_reg_read(tp, OCP_POWER_CFG); - if (enable) { - data |= EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - } else { - data &= ~EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - msleep(20); - } -} - static void rtl8153_disable(struct r8152 *tp) { r8153_aldps_en(tp, false); @@ -3246,103 +3343,6 @@ static int rtl8152_close(struct net_device *netdev) return res; } -static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) -{ - ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); - ocp_reg_write(tp, OCP_EEE_DATA, reg); - ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); -} - -static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) -{ - u16 data; - - r8152_mmd_indirect(tp, dev, reg); - data = ocp_reg_read(tp, OCP_EEE_DATA); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); - - return data; -} - -static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) -{ - r8152_mmd_indirect(tp, dev, reg); - ocp_reg_write(tp, OCP_EEE_DATA, data); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); -} - -static void r8152_eee_en(struct r8152 *tp, bool enable) -{ - u16 config1, config2, config3; - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; - config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); - config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; - config1 |= sd_rise_time(1); - config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; - config3 |= fast_snr(42); - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | - RX_QUIET_EN); - config1 |= sd_rise_time(7); - config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); - config3 |= fast_snr(511); - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); - ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); - ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); -} - -static void r8152b_enable_eee(struct r8152 *tp) -{ - r8152_eee_en(tp, true); - r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); -} - -static void r8153_eee_en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - u16 config; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config = ocp_reg_read(tp, OCP_EEE_CFG); - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config |= EEE10_EN; - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config &= ~EEE10_EN; - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CFG, config); -} - -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - -static void r8152b_enable_fc(struct r8152 *tp) -{ - u16 anar; - - anar = r8152_mdio_read(tp, MII_ADVERTISE); - anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; - r8152_mdio_write(tp, MII_ADVERTISE, anar); -} - static void rtl_tally_reset(struct r8152 *tp) { u32 ocp_data; -- cgit v0.10.2 From 2dd436daac7848dbf3fe799cf59c1408871a14e3 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:06 +0800 Subject: r8152: move enabling PHY Move enabling PHY to init(), otherwise some other settings may fail. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ae7db46..dbf11ba 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,14 +2632,6 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { - u16 data; - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - set_bit(PHY_RESET, &tp->flags); } @@ -2818,16 +2810,6 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; - if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || - tp->version == RTL_VER_05) - ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3355,10 +3337,17 @@ static void rtl_tally_reset(struct r8152 *tp) static void r8152b_init(struct r8152 *tp) { u32 ocp_data; + u16 data; if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + r8152_aldps_en(tp, false); if (tp->version == RTL_VER_01) { @@ -3394,6 +3383,7 @@ static void r8152b_init(struct r8152 *tp) static void r8153_init(struct r8152 *tp) { u32 ocp_data; + u16 data; int i; if (test_bit(RTL8152_UNPLUG, &tp->flags)) @@ -3416,6 +3406,23 @@ static void r8153_init(struct r8152 *tp) msleep(20); } + if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || + tp->version == RTL_VER_05) + ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); + + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + + for (i = 0; i < 500; i++) { + ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK; + if (ocp_data == PHY_STAT_LAN_ON) + break; + msleep(20); + } + usb_disable_lpm(tp->udev); r8153_u2p3en(tp, false); -- cgit v0.10.2 From ef39df8eaba48c0de779440f41a648b17a560953 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:07 +0800 Subject: r8152: move PHY settings to hw_phy_cfg Move the PHY relative settings together to hw_phy_cfg(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index dbf11ba..9ce5bd5 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,6 +2632,10 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { + r8152b_enable_eee(tp); + r8152_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -2839,6 +2843,10 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); + r8153_enable_eee(tp); + r8153_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -3369,9 +3377,6 @@ static void r8152b_init(struct r8152 *tp) SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK; ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data); - r8152b_enable_eee(tp); - r8152_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); /* enable rx aggregation */ @@ -3490,9 +3495,6 @@ static void r8153_init(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); - r8153_enable_eee(tp); - r8153_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); r8153_u2p3en(tp, true); } -- cgit v0.10.2 From af0287ec10c62c84cc5cd1bad4fd37644a1ac41d Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:08 +0800 Subject: r8152: remove r8153_enable_eee Remove r8153_enable_eee(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 9ce5bd5..e7a05dd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2803,12 +2803,6 @@ static void r8153_eee_en(struct r8152 *tp, bool enable) ocp_reg_write(tp, OCP_EEE_CFG, config); } -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2843,7 +2837,9 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); - r8153_enable_eee(tp); + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); + r8153_aldps_en(tp, true); r8152b_enable_fc(tp); -- cgit v0.10.2 From d768c61bc353a0e0de3f839e1de99eee7d4eca10 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:09 +0800 Subject: r8152: disable ALDPS and EEE before setting PHY Disable ALDPS and EEE to avoid the possible failure when setting the PHY. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e7a05dd..c254248 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "5" +#define NET_VERSION "6" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -2808,6 +2808,13 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; + /* disable ALDPS before updating the PHY parameters */ + r8153_aldps_en(tp, false); + + /* disable EEE before updating the PHY parameters */ + r8153_eee_en(tp, false); + ocp_reg_write(tp, OCP_EEE_ADV, 0); + if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3390,7 +3397,6 @@ static void r8153_init(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; - r8153_aldps_en(tp, false); r8153_u1u2en(tp, false); for (i = 0; i < 500; i++) { -- cgit v0.10.2 From b5036cd4ed3173ab8cdbc85e2ba74acf46bafb51 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 20 Sep 2016 16:17:22 +0200 Subject: ipmr, ip6mr: return lastuse relative to now When I introduced the lastuse member I made a subtle error because it was returned as an absolute value but that is meaningless to user-space as it doesn't allow to see how old exactly an entry is. Let's make it similar to how the bridge returns such values and make it relative to "now" (jiffies). This allows us to show the actual age of the entries and is much more useful (e.g. user-space daemons can age out entries, iproute2 can display the lastuse properly). Fixes: 43b9e1274060 ("net: ipmr/ip6mr: add support for keeping an entry age") Reported-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2625332..a87bcd2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2076,6 +2076,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2105,12 +2106,14 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6122f9c..fccb5dd 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2239,6 +2239,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2269,12 +2270,14 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; -- cgit v0.10.2 From 63c43787d35e45562a6b5927e2edc8f4783d95b8 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 19 Sep 2016 16:17:57 +0200 Subject: vti6: fix input path Since commit 1625f4529957, vti6 is broken, all input packets are dropped (LINUX_MIB_XFRMINNOSTATES is incremented). XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 is set by vti6_rcv() before calling xfrm6_rcv()/xfrm6_rcv_spi(), thus we cannot set to NULL that value in xfrm6_rcv_spi(). A new function xfrm6_rcv_tnl() that enables to pass a value to xfrm6_rcv_spi() is added, so that xfrm6_rcv() is not touched (this function is used in several handlers). CC: Alexey Kodanev Fixes: 1625f4529957 ("net/xfrm_input: fix possible NULL deref of tunnel.ip6->parms.i_key") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6..1793431 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1540,8 +1540,10 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t); int xfrm6_transport_finish(struct sk_buff *skb, int async); +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 52a2f73..5bd3afd 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -321,11 +321,9 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; - rcu_read_unlock(); - return xfrm6_rcv(skb); + return xfrm6_rcv_tnl(skb, t); } rcu_read_unlock(); return -EINVAL; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 00a2d40..b578956 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -21,9 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm6_extract_header(skb); } -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t) { - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); @@ -49,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return -1; } -int xfrm6_rcv(struct sk_buff *skb) +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], - 0); + 0, t); } -EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_rcv_tnl); +int xfrm6_rcv(struct sk_buff *skb) +{ + return xfrm6_rcv_tnl(skb, NULL); +} +EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5743044..e1c0bbe 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -236,7 +236,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi); + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v0.10.2 From 8d58790b832e13d6006d842037732304af357c3c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 19 Sep 2016 21:34:01 +0200 Subject: net: can: ifi: Configure transmitter delay Configure the transmitter delay register at +0x1c to correctly handle the CAN FD bitrate switch (BRS). This moves the SSP (secondary sample point) to a proper offset, so that the TDC mechanism works and won't generate error frames on the CAN link. Signed-off-by: Marek Vasut Cc: Marc Kleine-Budde Cc: Mark Rutland Cc: Oliver Hartkopp Cc: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 2d1d22e..368bb07 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -81,6 +81,10 @@ #define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6 BIT(15) #define IFI_CANFD_TDELAY 0x1c +#define IFI_CANFD_TDELAY_DEFAULT 0xb +#define IFI_CANFD_TDELAY_MASK 0x3fff +#define IFI_CANFD_TDELAY_ABS BIT(14) +#define IFI_CANFD_TDELAY_EN BIT(15) #define IFI_CANFD_ERROR 0x20 #define IFI_CANFD_ERROR_TX_OFFSET 0 @@ -641,7 +645,7 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) struct ifi_canfd_priv *priv = netdev_priv(ndev); const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u16 brp, sjw, tseg1, tseg2; + u16 brp, sjw, tseg1, tseg2, tdc; /* Configure bit timing */ brp = bt->brp - 2; @@ -664,6 +668,11 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) (brp << IFI_CANFD_TIME_PRESCALE_OFF) | (sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8), priv->base + IFI_CANFD_FTIME); + + /* Configure transmitter delay */ + tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK; + writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc, + priv->base + IFI_CANFD_TDELAY); } static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id, -- cgit v0.10.2 From fba1296624bf95fc07057da1e26beee8a733180c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 20 Sep 2016 14:55:31 +0300 Subject: net/mlx4_core: Fix to clean devlink resources This patch cleans devlink resources by calling devlink_port_unregister() to avoid the following issues: - Kernel panic when triggering reset flow. - Memory leak due to unfreed resources in mlx4_init_port_info(). Fixes: 09d4d087cd48 ("mlx4: Implement devlink interface") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 75dd2e3..7183ac4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2970,6 +2970,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + devlink_port_unregister(&info->devlink_port); info->port = -1; } @@ -2984,6 +2985,8 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); + devlink_port_unregister(&info->devlink_port); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(info->rmap); info->rmap = NULL; -- cgit v0.10.2 From adb03115f4590baa280ddc440a8eff08a6be0cb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Sep 2016 18:06:17 -0700 Subject: net: get rid of an signed integer overflow in ip_idents_reserve() Jiri Pirko reported an UBSAN warning happening in ip_idents_reserve() [] UBSAN: Undefined behaviour in ./arch/x86/include/asm/atomic.h:156:11 [] signed integer overflow: [] -2117905507 + -695755206 cannot be represented in type 'int' Since we do not have uatomic_add_return() yet, use atomic_cmpxchg() so that the arithmetics can be done using unsigned int. Fixes: 04ca6973f7c1 ("ip: make IP identifiers less predictable") Signed-off-by: Eric Dumazet Reported-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a1f2830..b5b47a2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -476,12 +476,18 @@ u32 ip_idents_reserve(u32 hash, int segs) atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; u32 old = ACCESS_ONCE(*p_tstamp); u32 now = (u32)jiffies; - u32 delta = 0; + u32 new, delta = 0; if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - return atomic_add_return(segs + delta, p_id) - segs; + /* Do not use atomic_add_return() as it makes UBSAN unhappy */ + do { + old = (u32)atomic_read(p_id); + new = old + delta + segs; + } while (atomic_cmpxchg(p_id, old, new) != old); + + return new - segs; } EXPORT_SYMBOL(ip_idents_reserve); -- cgit v0.10.2 From 75c9510b8f745f75280029a8a9f96567f55f401e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 20 Sep 2016 23:33:15 -0400 Subject: MAINTAINERS: Update b44 maintainer. Taking over as maintainer since Gary Zambrano is no longer working for Broadcom. Signed-off-by: Michael Chan Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 247b418..3df4be3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2500,7 +2500,7 @@ S: Supported F: kernel/bpf/ BROADCOM B44 10/100 ETHERNET DRIVER -M: Gary Zambrano +M: Michael Chan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/b44.* -- cgit v0.10.2 From de1d657816c6fbb70f07b01d50ec669dff0d4e60 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:14 -0700 Subject: tcp: fix under-accounting retransmit SNMP counters This patch fixes these under-accounting SNMP rtx stats LINUX_MIB_TCPFORWARDRETRANS LINUX_MIB_TCPFASTRETRANS LINUX_MIB_TCPSLOWSTARTRETRANS when retransmitting TSO packets Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f53d0cc..e15ec82 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2831,7 +2831,7 @@ begin_fwd: if (tcp_retransmit_skb(sk, skb, segs)) return; - NET_INC_STATS(sock_net(sk), mib_idx); + NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb)); if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); -- cgit v0.10.2 From 7e32b44361abc77fbc01f2b97b045c405b2583e5 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:15 -0700 Subject: tcp: properly account Fast Open SYN-ACK retrans Since the TFO socket is accepted right off SYN-data, the socket owner can call getsockopt(TCP_INFO) to collect ongoing SYN-ACK retransmission or timeout stats (i.e., tcpi_total_retrans, tcpi_retransmits). Currently those stats are only updated upon handshake completes. This patch fixes it. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ebf45b..08323bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5885,7 +5885,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * so release it. */ if (req) { - tp->total_retrans = req->num_retrans; + inet_csk(sk)->icsk_retransmits = 0; reqsk_fastopen_remove(sk, req, false); } else { /* Make sure socket is routed, for correct metrics. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e15ec82..5288cec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3568,6 +3568,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) if (!res) { __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + if (unlikely(tcp_passive_fastopen(sk))) + tcp_sk(sk)->total_retrans++; } return res; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d84930b..f712b41 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -384,6 +384,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; + icsk->icsk_retransmits++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } -- cgit v0.10.2