From 133800d1f0288b9ddfc0d0aded10d9efa82d5b8c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 8 Mar 2016 10:34:28 -0300 Subject: sctp: fix copying more bytes than expected in sctp_add_bind_addr Dmitry reported that sctp_add_bind_addr may read more bytes than expected in case the parameter is a IPv4 addr supplied by the user through calls such as sctp_bindx_add(), because it always copies sizeof(union sctp_addr) while the buffer may be just a struct sockaddr_in, which is smaller. This patch then fixes it by limiting the memcpy to the min between the union size and a (new parameter) provided addr size. Where possible this parameter still is the size of that union, except for reading from user-provided buffers, which then it accounts for protocol type. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 205630b..f816344 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1098,7 +1098,7 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, gfp_t gfp); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - __u8 addr_state, gfp_t gfp); + int new_size, __u8 addr_state, gfp_t gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 871cdf9..401c607 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -111,7 +111,8 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, dest->port = src->port; list_for_each_entry(addr, &src->address_list, list) { - error = sctp_add_bind_addr(dest, &addr->a, 1, gfp); + error = sctp_add_bind_addr(dest, &addr->a, sizeof(addr->a), + 1, gfp); if (error < 0) break; } @@ -150,7 +151,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) /* Add an address to the bind address list in the SCTP_bind_addr structure. */ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, - __u8 addr_state, gfp_t gfp) + int new_size, __u8 addr_state, gfp_t gfp) { struct sctp_sockaddr_entry *addr; @@ -159,7 +160,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, if (!addr) return -ENOMEM; - memcpy(&addr->a, new, sizeof(*new)); + memcpy(&addr->a, new, min_t(size_t, sizeof(*new), new_size)); /* Fix up the port if it has not yet been set. * Both v4 and v6 have the port at the same offset. @@ -291,7 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, } af->from_addr_param(&addr, rawaddr, htons(port), 0); - retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp); + retval = sctp_add_bind_addr(bp, &addr, sizeof(addr), + SCTP_ADDR_SRC, gfp); if (retval) { /* Can't finish building the list, clean up. */ sctp_bind_addr_clean(bp); @@ -453,8 +455,8 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && (flags & SCTP_ADDR6_PEERSUPP)))) - error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC, - gfp); + error = sctp_add_bind_addr(dest, addr, sizeof(*addr), + SCTP_ADDR_SRC, gfp); } return error; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1099e99..d3d50da 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -216,6 +216,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { error = sctp_add_bind_addr(bp, &addr->a, + sizeof(addr->a), SCTP_ADDR_SRC, GFP_ATOMIC); if (error) goto end_copy; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5d6a03f..7fe971e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1830,7 +1830,8 @@ no_hmac: /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, - SCTP_ADDR_SRC, GFP_ATOMIC); + sizeof(chunk->dest), SCTP_ADDR_SRC, + GFP_ATOMIC); } retval->next_tsn = retval->c.initial_tsn; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e878da0..0e3de0c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -386,7 +386,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. */ - ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC); + ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len, + SCTP_ADDR_SRC, GFP_ATOMIC); /* Copy back into socket for getsockname() use. */ if (!ret) { @@ -577,6 +578,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); retval = sctp_add_bind_addr(bp, &saveaddr, + sizeof(saveaddr), SCTP_ADDR_NEW, GFP_ATOMIC); addr_buf += af->sockaddr_len; } -- cgit v0.10.2 From eaa93bf4c6090809395605d1775a0db9970eda5e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 18 Mar 2016 18:37:57 +0100 Subject: vxlan: fix populating tclass in vxlan6_get_route Jiri mentioned that flowi6_tos of struct flowi6 is never used/read anywhere. In fact, rest of the kernel uses the flowi6's flowlabel, where the traffic class _and_ the flowlabel (aka flowinfo) is encoded. For example, for policy routing, fib6_rule_match() uses ip6_tclass() that is applied on the flowlabel member for matching on tclass. Similar fix is needed for geneve, where flowi6_tos is set as well. Installing a v6 blackhole rule that f.e. matches on tos is now working with vxlan. Fixes: 1400615d64cf ("vxlan: allow setting ipv6 traffic class") Reported-by: Jiri Benc Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 800106a7..7bfcb9a62a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1810,10 +1810,9 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; - fl6.flowi6_tos = RT_TOS(tos); fl6.daddr = *daddr; fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr; - fl6.flowlabel = label; + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f3c9857..d0aeb97 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -835,6 +835,12 @@ static inline u8 ip6_tclass(__be32 flowinfo) { return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } + +static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) +{ + return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; +} + /* * Prototypes exported by ipv6 */ -- cgit v0.10.2 From 95caf6f71a9998b5382e3fb9abbca34c02e8f74f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 18 Mar 2016 18:37:58 +0100 Subject: geneve: fix populating tclass in geneve_get_v6_dst The struct flowi6's flowi6_tos is not used in IPv6 route lookup, the traffic class information is handled in the flowi6's flowlabel member instead. For example, for policy routing, fib6_rule_match() uses ip6_tclass() that is applied on the flowlabel for matching on tclass, which would currently not work as expected. Fixes: 3a56f86f1be6 ("geneve: handle ipv6 priority like ipv4 tos") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 192631a..bc16889 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -843,8 +843,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, if (info) { fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; - fl6->flowi6_tos = RT_TOS(info->key.tos); - fl6->flowlabel = info->key.label; + fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos), + info->key.label); dst_cache = &info->dst_cache; } else { prio = geneve->tos; @@ -855,8 +855,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, use_cache = false; } - fl6->flowi6_tos = RT_TOS(prio); - fl6->flowlabel = geneve->label; + fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio), + geneve->label); fl6->daddr = geneve->remote.sin6.sin6_addr; dst_cache = &geneve->dst_cache; } @@ -1049,7 +1049,8 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto err; - prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb); + prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), + iip, skb); ttl = geneve->ttl; if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) ttl = 1; -- cgit v0.10.2 From 69716a2b51aeb68fe295c0d09e26c8781eacebde Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 18 Mar 2016 18:37:59 +0100 Subject: ipv6, trace: fix tos reporting on fib6_table_lookup flowi6_tos of struct flowi6 is unused in IPv6, therefore dumping tos on that tracepoint will also give incorrect information wrt traffic class. If we want to fix it, we need to extract it via ip6_tclass(flp->flowlabel). While for the same test case I get a count of 0 non-zero tos values before the change, they now start to show up after the change: # ./perf record -e fib6:fib6_table_lookup -a sleep 10 # ./perf script | grep -v "tos 0" | wc -l 60 Since there's no user in the kernel tree anymore of flowi6_tos, remove the define to avoid any future confusion on this. Fixes: b811580d91e9 ("net: IPv6 fib lookup tracepoint") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/include/net/flow.h b/include/net/flow.h index 83969ee..d47ef4b 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -127,7 +127,6 @@ struct flowi6 { #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif #define flowi6_mark __fl_common.flowic_mark -#define flowi6_tos __fl_common.flowic_tos #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags @@ -135,6 +134,7 @@ struct flowi6 { #define flowi6_tun_key __fl_common.flowic_tun_key struct in6_addr daddr; struct in6_addr saddr; + /* Note: flowi6_tos is encoded in flowlabel, too. */ __be32 flowlabel; union flowi_uli uli; #define fl6_sport uli.ports.sport diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 4cf6bac..d60096c 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -37,7 +37,7 @@ TRACE_EVENT(fib6_table_lookup, __entry->tb_id = tb_id; __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; - __entry->tos = flp->flowi6_tos; + __entry->tos = ip6_tclass(flp->flowlabel); __entry->scope = flp->flowi6_scope; __entry->flags = flp->flowi6_flags; -- cgit v0.10.2 From 5a779c4feda5d52e0f19e48af71b75dd1032a5b8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 20 Mar 2016 17:44:56 +0000 Subject: net/mlx4: remove unused array zero_gid[] zero_gid is not used, so remove this redundant array. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 42d8de8..6aa7397 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -39,8 +39,6 @@ #include "mlx4.h" -static const u8 zero_gid[16]; /* automatically initialized to 0 */ - int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { return 1 << dev->oper_log_mgm_entry_size; -- cgit v0.10.2 From abbdb5a74cead60e20b79c960c1772955f0b6b81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 20 Mar 2016 11:27:47 -0700 Subject: net: remove a dubious unlikely() clause TCP protocol is still used these days, and TCP uses clones in its transmit path. We can not optimize linux stack assuming it is mostly used in routers, or that TCP is dead. Fixes: 795bb1c00d ("net: bulk free infrastructure for NAPI context, use napi_consume_skb") Signed-off-by: Eric Dumazet Cc: Jesper Dangaard Brouer Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f044f97..d04c2d1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -815,7 +815,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) trace_consume_skb(skb); /* if SKB is a clone, don't handle this case */ - if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) { + if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { __kfree_skb(skb); return; } -- cgit v0.10.2 From 07b4d6a1749422fa1e054f3c2aba444acdba39e8 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 18 Mar 2016 18:39:18 -0300 Subject: sctp: do not update a_rwnd if we are not issuing a sack The SACK can be lost pretty much elsewhere, but if its allocation fail, we know we are not sending it, so it is better to revert a_rwnd to its previous value as this may give it a chance to issue a window update later. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 3c22c41..7fe56d0 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -215,10 +215,14 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { + __u32 old_a_rwnd = asoc->a_rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); - if (!sack) + if (!sack) { + asoc->a_rwnd = old_a_rwnd; goto nomem; + } asoc->peer.sack_needed = 0; asoc->peer.sack_cnt = 0; -- cgit v0.10.2 From 31b055ef0c6116a27e9a787304ecf87a77d34764 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 18 Mar 2016 18:39:19 -0300 Subject: sctp: do not leak chunks that are sent to unconfirmed paths Currently, if a chunk is scheduled to be sent through a transport that is currently unconfirmed, it will be leaked as it is dequeued from outq and is not re-queued nor freed. As I'm not aware of any situation that may lead to this situation, I'm fixing this by freeing the chunk and also logging a trace so that we can fix the other bug if it ever happens. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index f03541d..8d3d362 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -978,8 +978,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) (new_transport->state == SCTP_UNCONFIRMED) || (new_transport->state == SCTP_PF))) new_transport = asoc->peer.active_path; - if (new_transport->state == SCTP_UNCONFIRMED) + if (new_transport->state == SCTP_UNCONFIRMED) { + WARN_ONCE(1, "Atempt to send packet on unconfirmed path."); + sctp_chunk_fail(chunk, 0); + sctp_chunk_free(chunk); continue; + } /* Change packets if necessary. */ if (new_transport != transport) { -- cgit v0.10.2 From 3822a5ff4bc32043fa9c7b6d6f125bcdca6da39c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 19 Mar 2016 12:17:20 -0300 Subject: sctp: align MTU to a word SCTP is a protocol that is aligned to a word (4 bytes). Thus using bare MTU can sometimes return values that are not aligned, like for loopback, which is 65536 but ipv4_mtu() limits that to 65535. This mis-alignment will cause the last non-aligned bytes to never be used and can cause issues with congestion control. So it's better to just consider a lower MTU and keep congestion control calcs saner as they are based on PMTU. Same applies to icmp frag needed messages, which is also fixed by this patch. One other effect of this is the inability to send MTU-sized packet without queueing or fragmentation and without hitting Nagle. As the check performed at sctp_packet_can_append_data(): if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; with the above example of MTU, if there are no other messages queued, one cannot send a packet that just fits one packet (65532 bytes) and without causing DATA chunk fragmentation or a delay. v2: - Added WORD_TRUNC macro Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 835aa2e..ad2136c 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -82,6 +82,11 @@ #define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT #endif +/* Round an int up to the next multiple of 4. */ +#define WORD_ROUND(s) (((s)+3)&~3) +/* Truncate to the previous multiple of 4. */ +#define WORD_TRUNC(s) ((s)&~3) + /* * Function declarations. */ @@ -475,9 +480,6 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\ (void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\ pos++) -/* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) - /* External references. */ extern struct proto sctp_prot; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a19b3e6..e1849f3 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1406,7 +1406,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst)); + sctp_transport_update_pmtu(sk, t, + WORD_TRUNC(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index db76f1a..00b8445 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -606,7 +606,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { - sctp_icmp_frag_needed(sk, asoc, transport, info); + sctp_icmp_frag_needed(sk, asoc, transport, + WORD_TRUNC(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { diff --git a/net/sctp/transport.c b/net/sctp/transport.c index d517153..9b6b48c 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -226,7 +226,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = dst_mtu(transport->dst); + transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -280,7 +280,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = dst_mtu(transport->dst); + transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). -- cgit v0.10.2 From 659e0bcaebc4ca36e64eac6e9f39c1904b17472c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 19 Mar 2016 12:17:43 -0300 Subject: sctp: keep fragmentation point aligned to word size If the user supply a different fragmentation point or if there is a network header that cause it to not be aligned, force it to be aligned. Fragmentation point at a value that is not aligned is not optimal. It causes extra padding to be used and has just no pros. v2: - Make use of the new WORD_TRUNC macro Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ad2136c..65521cf 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -431,7 +431,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN); + frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } -- cgit v0.10.2 From b8cba75bdf6a48ea4811bbefb11a94a5c7281b68 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:00 -0700 Subject: ipip: Properly mark ipip GRO packets as encapsulated. ipip encapsulated packets can be merged together by GRO but the result does not have the proper GSO type set or even marked as being encapsulated at all. Later retransmission of these packets will likely fail if the device does not support ipip offloads. This is similar to the issue resolved in IPv6 sit in feec0cb3 ("ipv6: gro: support sit protocol"). Reported-by: Patrick Boutilier Fixes: 9667e9bb ("ipip: Add gro callbacks to ipip offload") Tested-by: Patrick Boutilier Acked-by: Eric Dumazet Signed-off-by: Jesse Gross Signed-off-by: David S. Miller diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0cc923f..9659233 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1448,6 +1448,13 @@ out_unlock: return err; } +static int ipip_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP; + return inet_gro_complete(skb, nhoff); +} + int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) @@ -1676,7 +1683,7 @@ static const struct net_offload ipip_offload = { .callbacks = { .gso_segment = inet_gso_segment, .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, + .gro_complete = ipip_gro_complete, }, }; -- cgit v0.10.2 From fac8e0f579695a3ecbc4d3cac369139d7f819971 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:01 -0700 Subject: tunnels: Don't apply GRO to multiple layers of encapsulation. When drivers express support for TSO of encapsulated packets, they only mean that they can do it for one layer of encapsulation. Supporting additional levels would mean updating, at a minimum, more IP length fields and they are unaware of this. No encapsulation device expresses support for handling offloaded encapsulated packets, so we won't generate these types of frames in the transmit path. However, GRO doesn't have a check for multiple levels of encapsulation and will attempt to build them. UDP tunnel GRO actually does prevent this situation but it only handles multiple UDP tunnels stacked on top of each other. This generalizes that solution to prevent any kind of tunnel stacking that would cause problems. Fixes: bf5a755f ("net-gre-gro: Add GRE support to the GRO stack") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be693b3..f9eebd5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2096,8 +2096,8 @@ struct napi_gro_cb { /* This is non-zero if the packet may be of the same flow. */ u8 same_flow:1; - /* Used in udp_gro_receive */ - u8 udp_mark:1; + /* Used in tunnel GRO receive */ + u8 encap_mark:1; /* GRO checksum is valid */ u8 csum_valid:1; diff --git a/net/core/dev.c b/net/core/dev.c index edb7179..43c74ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4438,7 +4438,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9659233..0fefba6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1380,6 +1380,19 @@ out: return pp; } +static struct sk_buff **ipip_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return inet_gro_receive(head, skb); +} + #define SECONDS_PER_DAY 86400 /* inet_current_timestamp - Return IP network timestamp @@ -1682,7 +1695,7 @@ static struct packet_offload ip_packet_offload __read_mostly = { static const struct net_offload ipip_offload = { .callbacks = { .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, + .gro_receive = ipip_gro_receive, .gro_complete = ipip_gro_complete, }, }; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 540866d..dd03161 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -126,6 +126,11 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct packet_offload *ptype; __be16 type; + if (NAPI_GRO_CB(skb)->encap_mark) + goto out; + + NAPI_GRO_CB(skb)->encap_mark = 1; + off = skb_gro_offset(skb); hlen = off + sizeof(*greh); greh = skb_gro_header_fast(skb, off); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8a3405a..8007f73 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -311,14 +311,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - if (NAPI_GRO_CB(skb)->udp_mark || + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid)) goto out; - /* mark that this skb passed once through the udp gro layer */ - NAPI_GRO_CB(skb)->udp_mark = 1; + /* mark that this skb passed once through the tunnel gro layer */ + NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index eeca943..82e9f30 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -258,6 +258,19 @@ out: return pp; } +static struct sk_buff **sit_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return ipv6_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -302,7 +315,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, + .gro_receive = sit_gro_receive, .gro_complete = sit_gro_complete, }, }; -- cgit v0.10.2 From a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:02 -0700 Subject: tunnels: Remove encapsulation offloads on decap. If a packet is either locally encapsulated or processed through GRO it is marked with the offloads that it requires. However, when it is decapsulated these tunnel offload indications are not removed. This means that if we receive an encapsulated TCP packet, aggregate it with GRO, decapsulate, and retransmit the resulting frame on a NIC that does not support encapsulation, we won't be able to take advantage of hardware offloads even though it is just a simple TCP packet at this point. This fixes the problem by stripping off encapsulation offload indications when packets are decapsulated. The performance impacts of this bug are significant. In a test where a Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, decapsulated, and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a result of avoiding unnecessary segmentation at the VM tap interface. Reported-by: Ramu Ramamurthy Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c35dda9..56050f9 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -305,6 +305,22 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); +static inline int iptunnel_pull_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) { + int err; + + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return err; + skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> + NETIF_F_GSO_SHIFT); + } + + skb->encapsulation = 0; + return 0; +} + static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 7804842..a0586b4 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static void fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, size_t len) { struct iphdr *iph = ip_hdr(skb); @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len) __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); + return iptunnel_pull_offloads(skb); } static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - fou_recv_pull(skb, sizeof(struct udphdr)); + if (fou_recv_pull(skb, sizeof(struct udphdr))) + goto drop; return -fou->protocol; + +drop: + kfree_skb(skb); + return 0; } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); + if (iptunnel_pull_offloads(skb)) + goto drop; + return -guehdr->proto_ctype; drop: diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index d27276f..02dd990 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -114,7 +114,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb_scrub_packet(skb, xnet); - return 0; + + return iptunnel_pull_offloads(skb); } EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f45b8ff..8338430 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -681,14 +681,16 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; - skb->protocol = htons(ETH_P_IPV6); + skb->dev = tunnel->dev; if (packet_is_spoofed(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; goto out; } - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6), + !net_eq(tunnel->net, dev_net(tunnel->dev)))) + goto out; err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { -- cgit v0.10.2 From ebf918cf9922748b9d4d9bf6a2620530cdc5a1d0 Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Sat, 19 Mar 2016 11:21:14 -0700 Subject: isdn: Use ktime_t instead of 'struct timeval' 'struct timeval' uses 32-bit representation for seconds which will overflow in year 2038 and beyond. mISDN/clock.c needs to compute and store elapsed time in intervals of 125 microseconds. This patch replaces the usage of 'struct timeval' with 64-bit ktime_t which is y2038 safe. The patch also replaces do_gettimeofday() (wall-clock time) with ktime_get() (monotonic time) since we only care about elapsed time here. Signed-off-by: Tina Ruchandani Suggested-by: Arnd Bergmnann Suggested-by: David Miller Signed-off-by: David S. Miller diff --git a/drivers/isdn/mISDN/clock.c b/drivers/isdn/mISDN/clock.c index 693fb7c..f8f659f 100644 --- a/drivers/isdn/mISDN/clock.c +++ b/drivers/isdn/mISDN/clock.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -45,15 +46,15 @@ static u_int *debug; static LIST_HEAD(iclock_list); static DEFINE_RWLOCK(iclock_lock); static u16 iclock_count; /* counter of last clock */ -static struct timeval iclock_tv; /* time stamp of last clock */ -static int iclock_tv_valid; /* already received one timestamp */ +static ktime_t iclock_timestamp; /* time stamp of last clock */ +static int iclock_timestamp_valid; /* already received one timestamp */ static struct mISDNclock *iclock_current; void mISDN_init_clock(u_int *dp) { debug = dp; - do_gettimeofday(&iclock_tv); + iclock_timestamp = ktime_get(); } static void @@ -86,7 +87,7 @@ select_iclock(void) } if (bestclock != iclock_current) { /* no clock received yet */ - iclock_tv_valid = 0; + iclock_timestamp_valid = 0; } iclock_current = bestclock; } @@ -139,12 +140,11 @@ mISDN_unregister_clock(struct mISDNclock *iclock) EXPORT_SYMBOL(mISDN_unregister_clock); void -mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv) +mISDN_clock_update(struct mISDNclock *iclock, int samples, ktime_t *timestamp) { u_long flags; - struct timeval tv_now; - time_t elapsed_sec; - int elapsed_8000th; + ktime_t timestamp_now; + u16 delta; write_lock_irqsave(&iclock_lock, flags); if (iclock_current != iclock) { @@ -156,33 +156,27 @@ mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv) write_unlock_irqrestore(&iclock_lock, flags); return; } - if (iclock_tv_valid) { + if (iclock_timestamp_valid) { /* increment sample counter by given samples */ iclock_count += samples; - if (tv) { /* tv must be set, if function call is delayed */ - iclock_tv.tv_sec = tv->tv_sec; - iclock_tv.tv_usec = tv->tv_usec; - } else - do_gettimeofday(&iclock_tv); + if (timestamp) { /* timestamp must be set, if function call is delayed */ + iclock_timestamp = *timestamp; + } else { + iclock_timestamp = ktime_get(); + } } else { /* calc elapsed time by system clock */ - if (tv) { /* tv must be set, if function call is delayed */ - tv_now.tv_sec = tv->tv_sec; - tv_now.tv_usec = tv->tv_usec; - } else - do_gettimeofday(&tv_now); - elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec; - elapsed_8000th = (tv_now.tv_usec / 125) - - (iclock_tv.tv_usec / 125); - if (elapsed_8000th < 0) { - elapsed_sec -= 1; - elapsed_8000th += 8000; + if (timestamp) { /* timestamp must be set, if function call is delayed */ + timestamp_now = *timestamp; + } else { + timestamp_now = ktime_get(); } + delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp), + (NSEC_PER_SEC / 8000)); /* add elapsed time to counter and set new timestamp */ - iclock_count += elapsed_sec * 8000 + elapsed_8000th; - iclock_tv.tv_sec = tv_now.tv_sec; - iclock_tv.tv_usec = tv_now.tv_usec; - iclock_tv_valid = 1; + iclock_count += delta; + iclock_timestamp = timestamp_now; + iclock_timestamp_valid = 1; if (*debug & DEBUG_CLOCK) printk("Received first clock from source '%s'.\n", iclock_current ? iclock_current->name : "nothing"); @@ -195,22 +189,17 @@ unsigned short mISDN_clock_get(void) { u_long flags; - struct timeval tv_now; - time_t elapsed_sec; - int elapsed_8000th; + ktime_t timestamp_now; + u16 delta; u16 count; read_lock_irqsave(&iclock_lock, flags); /* calc elapsed time by system clock */ - do_gettimeofday(&tv_now); - elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec; - elapsed_8000th = (tv_now.tv_usec / 125) - (iclock_tv.tv_usec / 125); - if (elapsed_8000th < 0) { - elapsed_sec -= 1; - elapsed_8000th += 8000; - } + timestamp_now = ktime_get(); + delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp), + (NSEC_PER_SEC / 8000)); /* add elapsed time to counter */ - count = iclock_count + elapsed_sec * 8000 + elapsed_8000th; + count = iclock_count + delta; read_unlock_irqrestore(&iclock_lock, flags); return count; } diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 246a352..ac02c54 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -596,7 +596,7 @@ static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) } extern void set_channel_address(struct mISDNchannel *, u_int, u_int); -extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); +extern void mISDN_clock_update(struct mISDNclock *, int, ktime_t *); extern unsigned short mISDN_clock_get(void); extern const char *mISDNDevName4ch(struct mISDNchannel *); -- cgit v0.10.2 From b002fdcc89979ff342d4139d165a4e1d51e586ff Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Sat, 19 Mar 2016 21:19:55 +0000 Subject: gen_stats.c: Add description for cpu argument Function gnet_stats_copy_basic is missing the description of the cpu argument in the documentation. Adding it. Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 1e2f46a..e640462 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -140,6 +140,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV * @d: dumping handle + * @cpu: copy statistic per cpu * @b: basic statistics * * Appends the basic statistics to the top level TLV created by -- cgit v0.10.2 From e9fc2f052c96142e50c6ddd0a5e7432af68bd13b Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Sat, 19 Mar 2016 21:31:38 +0000 Subject: net: sched: Add description for cpu_bstats argument Commit 22e0f8b9322c ("net: sched: make bstats per cpu and estimator RCU safe") added the argument cpu_bstats to functions gen_new_estimator and gen_replace_estimator and now the descriptions of these are missing for the documentation. Adding them. Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 92d886f..4573d81 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -191,6 +191,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats /** * gen_new_estimator - create a new rate estimator * @bstats: basic statistics + * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @stats_lock: statistics lock * @opt: rate estimator configuration TLV @@ -287,6 +288,7 @@ EXPORT_SYMBOL(gen_kill_estimator); /** * gen_replace_estimator - replace rate estimator configuration * @bstats: basic statistics + * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @stats_lock: statistics lock * @opt: rate estimator configuration TLV -- cgit v0.10.2 From 1c191307af5f0135b9e35975f3cef4168cefee66 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 20 Mar 2016 16:53:42 -0400 Subject: Revert "lan78xx: add ndo_get_stats64" This reverts commit a59f8c5b048dc938fb958c91c282c865cd845705. There are several bugs in this new code, for example: 1) Uses sleeping locks in get_stats64, which is not allowed, as the operation can be invoked in an atomic context. 2) Uses PM fields without CONFIG_PM or similar guards. 3) Does not synchronize HW stats when the device runtime suspends. Therefore this is being reverted until a correct version is implemented. Signed-off-by: David S. Miller diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index d36d5eb..f20890e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3261,54 +3261,6 @@ void lan78xx_tx_timeout(struct net_device *net) tasklet_schedule(&dev->bh); } -struct rtnl_link_stats64 *lan78xx_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *storage) -{ - struct lan78xx_net *dev = netdev_priv(netdev); - struct lan78xx_statstage64 stats; - - /* curr_stat is updated by timer. - * periodic reading from HW will prevent from entering USB auto suspend. - * if autosuspend is disabled, read from HW. - */ - if (!dev->udev->dev.power.runtime_auto) - lan78xx_update_stats(dev); - - mutex_lock(&dev->stats.access_lock); - memcpy(&stats, &dev->stats.curr_stat, sizeof(stats)); - mutex_unlock(&dev->stats.access_lock); - - /* calc by driver */ - storage->rx_packets = (__u64)netdev->stats.rx_packets; - storage->tx_packets = (__u64)netdev->stats.tx_packets; - storage->rx_bytes = (__u64)netdev->stats.rx_bytes; - storage->tx_bytes = (__u64)netdev->stats.tx_bytes; - - /* use counter */ - storage->rx_length_errors = stats.rx_undersize_frame_errors + - stats.rx_oversize_frame_errors; - storage->rx_crc_errors = stats.rx_fcs_errors; - storage->rx_frame_errors = stats.rx_alignment_errors; - storage->rx_fifo_errors = stats.rx_dropped_frames; - storage->rx_over_errors = stats.rx_oversize_frame_errors; - storage->rx_errors = stats.rx_fcs_errors + - stats.rx_alignment_errors + - stats.rx_fragment_errors + - stats.rx_jabber_errors + - stats.rx_undersize_frame_errors + - stats.rx_oversize_frame_errors + - stats.rx_dropped_frames; - - storage->tx_carrier_errors = stats.tx_carrier_errors; - storage->tx_errors = stats.tx_fcs_errors + - stats.tx_excess_deferral_errors + - stats.tx_carrier_errors; - - storage->multicast = stats.rx_multicast_frames; - - return storage; -} - static const struct net_device_ops lan78xx_netdev_ops = { .ndo_open = lan78xx_open, .ndo_stop = lan78xx_stop, @@ -3322,7 +3274,6 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, - .ndo_get_stats64 = lan78xx_get_stats64, }; static void lan78xx_stat_monitor(unsigned long param) -- cgit v0.10.2 From abc34d753ee013a62595c2f7e9260e173dd083c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 21 Mar 2016 09:30:59 +0100 Subject: net: smc911x: avoid unused variable warnings The change to use the generic DMA engine API in the smc911x driver has led to a harmless warning about unused local variables: smsc/smc911x.c: In function 'smc911x_probe': smsc/smc911x.c:1796:20: error: unused variable 'param' smsc/smc911x.c:1795:17: error: unused variable 'mask' smsc/smc911x.c:1794:26: error: unused variable 'config' This puts the variable declarations inside of the same #ifdef that protects their use. Signed-off-by: Arnd Bergmann Fixes: 79d3b59a93ba ("net: smc911x: convert pxa dma to dmaengine") Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 3f57110..a733868 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1791,9 +1791,11 @@ static int smc911x_probe(struct net_device *dev) unsigned int val, chip_id, revision; const char *version_string; unsigned long irq_flags; +#ifdef SMC_USE_DMA struct dma_slave_config config; dma_cap_mask_t mask; struct pxad_param param; +#endif DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); -- cgit v0.10.2 From 227f33beab746aeec4ef3305bd17b1d374df09e7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 21 Mar 2016 12:02:31 +0300 Subject: mdio-sun4i: oops in error handling in probe We could end up dereferencing an error pointer when we call regulator_disable(). Fixes: 4bdcb1dd9feb ('net: Add MDIO bus driver for the Allwinner EMAC') Signed-off-by: Dan Carpenter Acked-by: Chen-Yu Tsai Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index f70522c..1352965 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -122,6 +122,7 @@ static int sun4i_mdio_probe(struct platform_device *pdev) return -EPROBE_DEFER; dev_info(&pdev->dev, "no regulator found\n"); + data->regulator = NULL; } else { ret = regulator_enable(data->regulator); if (ret) @@ -137,7 +138,8 @@ static int sun4i_mdio_probe(struct platform_device *pdev) return 0; err_out_disable_regulator: - regulator_disable(data->regulator); + if (data->regulator) + regulator_disable(data->regulator); err_out_free_mdiobus: mdiobus_free(bus); return ret; -- cgit v0.10.2 From ed49e650371008b0e00c8004cc2ca93055740f78 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Mon, 21 Mar 2016 16:31:14 +0000 Subject: net: add description for len argument of dev_get_phys_port_name When the function dev_get_phys_port_name was added it missed a description for it's len argument. Adding it. Fixes: db24a9044ee1 ("net: add support for phys_port_name") Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 43c74ca..b9bcbe7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6445,6 +6445,7 @@ EXPORT_SYMBOL(dev_get_phys_port_id); * dev_get_phys_port_name - Get device physical port name * @dev: device * @name: port name + * @len: limit of bytes to copy to name * * Get device physical port name */ -- cgit v0.10.2 From 5692d7ea4183b8dd5a49d73d6a4436aa22929b7b Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 21 Mar 2016 17:39:18 +0100 Subject: vxlan: fix sparse warnings Sparse reports false positives for the header manipulation inlines. Annotate them correctly. Tested by sparse on a little endian and big endian machine. Fixes: 54bfd872bf16d ("vxlan: keep flags and vni in network byte order") Reported-by: kbuild test robot Signed-off-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/include/net/vxlan.h b/include/net/vxlan.h index a763c96..73ed2e9 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -271,36 +271,36 @@ static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) static inline __be32 vxlan_vni(__be32 vni_field) { #if defined(__BIG_ENDIAN) - return vni_field >> 8; + return (__force __be32)((__force u32)vni_field >> 8); #else - return (vni_field & VXLAN_VNI_MASK) << 8; + return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8); #endif } static inline __be32 vxlan_vni_field(__be32 vni) { #if defined(__BIG_ENDIAN) - return vni << 8; + return (__force __be32)((__force u32)vni << 8); #else - return vni >> 8; + return (__force __be32)((__force u32)vni >> 8); #endif } static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) { #if defined(__BIG_ENDIAN) - return tun_id; + return (__force __be32)tun_id; #else - return tun_id >> 32; + return (__force __be32)((__force u64)tun_id >> 32); #endif } static inline __be64 vxlan_vni_to_tun_id(__be32 vni) { #if defined(__BIG_ENDIAN) - return (__be64)vni; + return (__force __be64)vni; #else - return (__be64)vni << 32; + return (__force __be64)((u64)(__force u32)vni << 32); #endif } -- cgit v0.10.2 From 7d34fa75d3ee99a90ebb33c2917aa9152fb36a9c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 21 Mar 2016 17:50:05 +0100 Subject: vxlan: fix too large pskb_may_pull with remote checksum vxlan_remcsum is called after iptunnel_pull_header and thus the skb has vxlan header already pulled. Don't include vxlan header again in the calculation. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7bfcb9a62a..1c0fa36 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1143,7 +1143,7 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) static bool vxlan_remcsum(struct vxlanhdr *unparsed, struct sk_buff *skb, u32 vxflags) { - size_t start, offset, plen; + size_t start, offset; if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload) goto out; @@ -1151,9 +1151,7 @@ static bool vxlan_remcsum(struct vxlanhdr *unparsed, start = vxlan_rco_start(unparsed->vx_vni); offset = start + vxlan_rco_offset(unparsed->vx_vni); - plen = sizeof(struct vxlanhdr) + offset + sizeof(u16); - - if (!pskb_may_pull(skb, plen)) + if (!pskb_may_pull(skb, offset + sizeof(u16))) return false; skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset, -- cgit v0.10.2 From c70ce028e834f8e51306217dbdbd441d851c64d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Mar 2016 09:55:10 -0700 Subject: net/rtnetlink: add IFLA_GSO_MAX_SEGS and IFLA_GSO_MAX_SIZE attributes It can be useful to report dev->gso_max_segs and dev->gso_max_size so that "ip -d link" can display them to help debugging. For the moment, these attributes are read-only. Signed-off-by: Eric Dumazet Cc: Petri Gynther Cc: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8e3f88f..76d315c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -153,6 +153,8 @@ enum { IFLA_LINK_NETNSID, IFLA_PHYS_PORT_NAME, IFLA_PROTO_DOWN, + IFLA_GSO_MAX_SEGS, + IFLA_GSO_MAX_SIZE, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d2d9e5e..a69cd0c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -895,6 +895,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */ + + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1223,6 +1225,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || + nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || + nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif -- cgit v0.10.2 From ae74f10068387bb0e64d8f1c8beac5e35c1458b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Mar 2016 09:55:11 -0700 Subject: bridge: update max_gso_segs and max_gso_size It can be useful to lower max_gso_segs on NIC with very low number of TX descriptors like bcmgenet. However, this is defeated by bridge since it does not propagate the lower value of max_gso_segs and max_gso_size. Signed-off-by: Eric Dumazet Cc: Petri Gynther Cc: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a73df33..8217aec 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -437,6 +437,20 @@ int br_min_mtu(const struct net_bridge *br) return mtu; } +static void br_set_gso_limits(struct net_bridge *br) +{ + unsigned int gso_max_size = GSO_MAX_SIZE; + u16 gso_max_segs = GSO_MAX_SEGS; + const struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + gso_max_size = min(gso_max_size, p->dev->gso_max_size); + gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs); + } + br->dev->gso_max_size = gso_max_size; + br->dev->gso_max_segs = gso_max_segs; +} + /* * Recomputes features using slave's features */ @@ -564,6 +578,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); dev_set_mtu(br->dev, br_min_mtu(br)); + br_set_gso_limits(br); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -610,6 +625,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) del_nbp(p); dev_set_mtu(br->dev, br_min_mtu(br)); + br_set_gso_limits(br); spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); -- cgit v0.10.2 From 6b226e2f801253edf9d2f2afda538f550ae2b17d Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 21 Mar 2016 13:21:39 -0700 Subject: net: Fix indentation of the conf/ documentation block Commit d67ef35fff67 ("clarify documentation for net.ipv4.igmp_max_memberships") mistakenly indented a block of documentation such that it now looks like it belongs to a specific sysctl. Restore that block's original position. Cc: Jeremy Eder Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d5df40c..d3768e8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -946,16 +946,16 @@ igmp_max_memberships - INTEGER The value 5459 assumes no IP header options, so in practice this number may be lower. - conf/interface/* changes special settings per interface (where - "interface" is the name of your network interface) - - conf/all/* is special, changes the settings for all interfaces - igmp_qrv - INTEGER Controls the IGMP query robustness variable (see RFC2236 8.1). Default: 2 (as specified by RFC2236 8.1) Minimum: 1 (as specified by RFC6636 4.5) +conf/interface/* changes special settings per interface (where +"interface" is the name of your network interface) + +conf/all/* is special, changes the settings for all interfaces + log_martians - BOOLEAN Log packets with impossible addresses to kernel log. log_martians for the interface will be enabled if at least one of -- cgit v0.10.2 From 537377d3b766b18b9fca41628d1a9cef690b69fe Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 21 Mar 2016 13:21:40 -0700 Subject: igmp: Document sysctl_igmp_max_msf Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d3768e8..b183e2b 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -946,10 +946,15 @@ igmp_max_memberships - INTEGER The value 5459 assumes no IP header options, so in practice this number may be lower. +igmp_max_msf - INTEGER + Maximum number of addresses allowed in the source filter list for a + multicast group. + Default: 10 + igmp_qrv - INTEGER - Controls the IGMP query robustness variable (see RFC2236 8.1). - Default: 2 (as specified by RFC2236 8.1) - Minimum: 1 (as specified by RFC6636 4.5) + Controls the IGMP query robustness variable (see RFC2236 8.1). + Default: 2 (as specified by RFC2236 8.1) + Minimum: 1 (as specified by RFC6636 4.5) conf/interface/* changes special settings per interface (where "interface" is the name of your network interface) -- cgit v0.10.2 From 6d0e24cd07f7e97015d958299fda535e459d2c99 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Mon, 21 Mar 2016 20:58:28 +0000 Subject: net: add missing descriptions in net_device_priv_flags The flags IFF_XMIT_DST_RELEASE_PERM, IFF_IPVLAN_MASTER and IFF_IPVLAN_SLAVE are missing descriptions for the Documentation. Adding them. Signed-off-by: Luis de Bethencourt Suggested-by: Benjamin Poirier Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f9eebd5..fd07734 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1320,6 +1320,10 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device + * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account + * underlying stacked devices + * @IFF_IPVLAN_MASTER: IPvlan master device + * @IFF_IPVLAN_SLAVE: IPvlan slave device * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master -- cgit v0.10.2 From 62d885fe73e0032004e756b25101ac6c9ebf85dd Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 21 Mar 2016 14:08:28 -0700 Subject: net: Add missing kernel-doc for netdev ptype lists .//include/linux/netdevice.h:1826: warning: No description found for parameter 'ptype_all' .//include/linux/netdevice.h:1826: warning: No description found for parameter 'ptype_specific' Introduced by commit 7866a621043f ("dev: add per net_device packet type chains") Cc: Salam Noureddine Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fd07734..3f6385d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1418,6 +1418,8 @@ enum netdev_priv_flags { * @unreg_list: List entry, that is used, when we are unregistering the * device, see the function unregister_netdev * @close_list: List entry, that is used, when we are closing the device + * @ptype_all: Device-specific packet handlers for all protocols + * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding * @all_adj_list: All linked devices, *including* neighbours -- cgit v0.10.2 From 9b246841f4041f85265dec5f769c017fc36a0d33 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 21 Mar 2016 18:37:22 -0400 Subject: Make DST_CACHE a silent config option commit 911362c70d ("net: add dst_cache support") added a new kconfig option that gets selected by other networking options. It seems the intent wasn't to offer this as a user-selectable option given the lack of help text, so this patch converts it to a silent option. Signed-off-by: Dave Jones Signed-off-by: David S. Miller diff --git a/net/Kconfig b/net/Kconfig index e134498..a8934d8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -397,7 +397,7 @@ config LWTUNNEL with light weight tunnel state associated with fib routes. config DST_CACHE - bool "dst cache" + bool default n config NET_DEVLINK -- cgit v0.10.2 From 3ba9d300c925d89914d15beff2180064ac7ee6f6 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 21 Mar 2016 18:21:26 -0700 Subject: net: ipv4: Fix truncated timestamp returned by inet_current_timestamp() The millisecond timestamps returned by the function is converted to network byte order by making a call to htons(). htons() only returns __be16 while __be32 is required here. This was identified by the sparse warning from the buildbot: net/ipv4/af_inet.c:1405:16: sparse: incorrect type in return expression (different base types) net/ipv4/af_inet.c:1405:16: expected restricted __be32 net/ipv4/af_inet.c:1405:16: got restricted __be16 [usertype] Change the function to use htonl() to return the correct __be32 type instead so that the millisecond value doesn't get truncated. Signed-off-by: Deepa Dinamani Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Patrick McHardy Cc: Arnd Bergmann Fixes: 822c868532ca ("net: ipv4: Convert IP network timestamps to be y2038 safe") Reported-by: Fengguang Wu [0-day test robot] Signed-off-by: David S. Miller diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0fefba6..9e48199 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1415,7 +1415,7 @@ __be32 inet_current_timestamp(void) msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC; /* Convert to network byte order. */ - return htons(msecs); + return htonl(msecs); } EXPORT_SYMBOL(inet_current_timestamp); -- cgit v0.10.2 From 5f2d472450c94402627f54b603d271076f3f97c2 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Mon, 21 Mar 2016 10:15:34 -0700 Subject: ethtool: minor doc update Updates: commit 793cf87de9d1 ("ethtool: Set cmd field in ETHTOOL_GLINKSETTINGS response to wrong nwords") Signed-off-by: David Decotigny Signed-off-by: David S. Miller diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 2835b07..9222db8 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1648,9 +1648,9 @@ enum ethtool_reset_flags { * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user * (>= 0); on return, if handshake in progress, negative if * request size unsupported by kernel: absolute value indicates - * kernel recommended size and cmd field is 0, as well as all the - * other fields; otherwise (handshake completed), strictly - * positive to indicate size used by kernel and cmd field is + * kernel expected size and all the other fields but cmd + * are 0; otherwise (handshake completed), strictly positive + * to indicate size used by kernel and cmd field stays * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise -- cgit v0.10.2 From 025c68186e07afaededa84143f1a22f273cd3f67 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Mon, 21 Mar 2016 10:15:35 -0700 Subject: netlink: add support for NIC driver ioctls By returning -ENOIOCTLCMD, sock_do_ioctl() falls back to calling dev_ioctl(), which provides support for NIC driver ioctls, which includes ethtool support. This is similar to the way ioctls are handled in udp.c or tcp.c. This removes the requirement that ethtool for example be tied to the support of a specific L3 protocol (ethtool uses an AF_INET socket today). Signed-off-by: David Decotigny Signed-off-by: David S. Miller diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c841679..215fc08 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1033,6 +1033,14 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, return 0; } +static int netlink_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* try to hand this ioctl down to the NIC drivers. + */ + return -ENOIOCTLCMD; +} + static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; @@ -2494,7 +2502,7 @@ static const struct proto_ops netlink_ops = { .accept = sock_no_accept, .getname = netlink_getname, .poll = datagram_poll, - .ioctl = sock_no_ioctl, + .ioctl = netlink_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = netlink_setsockopt, -- cgit v0.10.2 From db219baf19a7024ec1db71f7c3c2663d49de1818 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Mon, 21 Mar 2016 17:37:28 +0000 Subject: ipv6: remove unused in6_addr struct struct in6_addr isn't used anymore in inet6_connection_sock.h, removing the forward declaration. Fixes: 1b33bc3e9e90 ("ipv6: remove obsolete inet6 functions") Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 064cfbe..954ad6b 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -15,7 +15,6 @@ #include -struct in6_addr; struct inet_bind_bucket; struct request_sock; struct sk_buff; -- cgit v0.10.2 From 0d6b425a3773c3445b0f51b2f333821beaacb619 Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Tue, 22 Mar 2016 16:06:22 +0800 Subject: net: hns: bug fix about ping6 The current upstreaming code fails to ping other IPv6 net device, because the enet receives the multicast packets with the src mac addr which is the same as its mac addr. These packets need to be dropped. Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 3f77ff7..ef84bd7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -564,6 +564,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, struct sk_buff *skb; struct hnae_desc *desc; struct hnae_desc_cb *desc_cb; + struct ethhdr *eh; unsigned char *va; int bnum, length, i; int pull_len; @@ -670,6 +671,14 @@ out_bnum_err: return -EFAULT; } + /* filter out multicast pkt with the same src mac as this port */ + eh = eth_hdr(skb); + if (unlikely(is_multicast_ether_addr(eh->h_dest) && + ether_addr_equal(ndev->dev_addr, eh->h_source))) { + dev_kfree_skb_any(skb); + return -EFAULT; + } + ring->stats.rx_pkts++; ring->stats.rx_bytes += skb->len; -- cgit v0.10.2 From f8a1a636c2a940d37dba9153bcfe687a802745fb Mon Sep 17 00:00:00 2001 From: Sheng Li Date: Tue, 22 Mar 2016 16:06:23 +0800 Subject: net: hns: fixed portid bug in sending manage pkt In chip V2, the default value of port id in tx BD is Zero. If it is not configurated to the other value, all management packets will be sent out from port0. So port_id in the tx BD needs to be updated when sending a management packet. In V2 chip, when sending mamagement packets, the driver should config the port id to BD descs. Signed-off-by: Sheng Li Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 1cbcb9f..37d0cce 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -147,6 +147,8 @@ enum hnae_led_state { #define HNSV2_TXD_BUFNUM_S 0 #define HNSV2_TXD_BUFNUM_M (0x7 << HNSV2_TXD_BUFNUM_S) +#define HNSV2_TXD_PORTID_S 4 +#define HNSV2_TXD_PORTID_M (0X7 << HNSV2_TXD_PORTID_S) #define HNSV2_TXD_RI_B 1 #define HNSV2_TXD_L4CS_B 2 #define HNSV2_TXD_L3CS_B 3 @@ -516,6 +518,7 @@ struct hnae_handle { int q_num; int vf_id; u32 eport_id; + u32 dport_id; /* v2 tx bd should fill the dport_id */ enum hnae_port_type port_type; struct list_head node; /* list to hnae_ae_dev->handle_list */ struct hnae_buf_ops *bops; /* operation for the buffer */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index d4f92ed..90352d6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -175,6 +175,7 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, ae_handle->phy_node = vf_cb->mac_cb->phy_node; ae_handle->if_support = vf_cb->mac_cb->if_support; ae_handle->port_type = vf_cb->mac_cb->mac_type; + ae_handle->dport_id = port_idx; return ae_handle; vf_id_err: diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index ef84bd7..ef517af 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -66,10 +66,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, desc->addr = cpu_to_le64(dma); desc->tx.send_size = cpu_to_le16((u16)size); - /*config bd buffer end */ + /* config bd buffer end */ hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); hnae_set_field(bn_pid, HNSV2_TXD_BUFNUM_M, 0, buf_num - 1); + /* fill port_id in the tx bd for sending management pkts */ + hnae_set_field(bn_pid, HNSV2_TXD_PORTID_M, + HNSV2_TXD_PORTID_S, ring->q->handle->dport_id); + if (type == DESC_TYPE_SKB) { skb = (struct sk_buff *)priv; -- cgit v0.10.2 From d5679849d134704bd9f9e95d2370eb60abf37ed5 Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Tue, 22 Mar 2016 16:06:24 +0800 Subject: net: hns: add uc match for debug ports Debug ports receives lots of packets with dest mac addr does not match local mac addr, because the filter is close, and it does not drop the useless packets. This patch adds ON/OFF switch of filtering the packets whose dest mac addr do not match the local addr in mac table. And the switch is ON in initialization. Signed-off-by: Kejian Yan Signed-off-by: Peng Li Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 90352d6..a7427b8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -420,7 +420,10 @@ static int hns_ae_set_autoneg(struct hnae_handle *handle, u8 enable) static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en) { + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + hns_dsaf_set_promisc_mode(hns_ae_get_dsaf_dev(handle->dev), en); + hns_mac_set_promisc(mac_cb, (u8)!!en); } static int hns_ae_get_autoneg(struct hnae_handle *handle) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index b8517b0..b8cf0e4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -290,6 +290,24 @@ static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed, return 0; } +static void hns_gmac_set_uc_match(void *mac_drv, u16 en) +{ + struct mac_driver *drv = mac_drv; + + dsaf_set_dev_bit(drv, GMAC_REC_FILT_CONTROL_REG, + GMAC_UC_MATCH_EN_B, !en); + dsaf_set_dev_bit(drv, GMAC_STATION_ADDR_HIGH_2_REG, + GMAC_ADDR_EN_B, !en); +} + +static void hns_gmac_set_promisc(void *mac_drv, u8 en) +{ + struct mac_driver *drv = mac_drv; + + if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG) + hns_gmac_set_uc_match(mac_drv, en); +} + static void hns_gmac_init(void *mac_drv) { u32 port; @@ -305,6 +323,8 @@ static void hns_gmac_init(void *mac_drv) mdelay(10); hns_gmac_disable(mac_drv, MAC_COMM_MODE_RX_AND_TX); hns_gmac_tx_loop_pkt_dis(mac_drv); + if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG) + hns_gmac_set_uc_match(mac_drv, 0); } void hns_gmac_update_stats(void *mac_drv) @@ -407,8 +427,13 @@ static void hns_gmac_set_mac_addr(void *mac_drv, char *mac_addr) u32 low_val = mac_addr[5] | (mac_addr[4] << 8) | (mac_addr[3] << 16) | (mac_addr[2] << 24); + + u32 val = dsaf_read_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG); + u32 sta_addr_en = dsaf_get_bit(val, GMAC_ADDR_EN_B); + dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val); - dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, high_val); + dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, + high_val | (sta_addr_en << GMAC_ADDR_EN_B)); } } @@ -699,6 +724,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->get_sset_count = hns_gmac_get_sset_count; mac_drv->get_strings = hns_gmac_get_strings; mac_drv->update_stats = hns_gmac_update_stats; + mac_drv->set_promiscuous = hns_gmac_set_promisc; return (void *)mac_drv; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 5ef0e96..138737d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -861,6 +861,14 @@ int hns_mac_get_sset_count(struct hns_mac_cb *mac_cb, int stringset) return mac_ctrl_drv->get_sset_count(stringset); } +void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en) +{ + struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + + if (mac_ctrl_drv->set_promiscuous) + mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en); +} + int hns_mac_get_regs_count(struct hns_mac_cb *mac_cb) { struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 0b05219..0f60968 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -365,7 +365,7 @@ struct mac_driver { /*config rx pause enable*/ void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable); /* config rx mode for promiscuous*/ - int (*set_promiscuous)(void *mac_drv, u8 enable); + void (*set_promiscuous)(void *mac_drv, u8 enable); /* get mac id */ void (*mac_get_id)(void *mac_drv, u8 *mac_id); void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en); @@ -453,4 +453,6 @@ int hns_mac_get_regs_count(struct hns_mac_cb *mac_cb); void hns_set_led_opt(struct hns_mac_cb *mac_cb); int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb, enum hnae_led_state status); +void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en); + #endif /* _HNS_DSAF_MAC_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 60d695d..bf62687 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -922,6 +922,8 @@ #define GMAC_LP_REG_CF2MI_LP_EN_B 2 #define GMAC_MODE_CHANGE_EB_B 0 +#define GMAC_UC_MATCH_EN_B 0 +#define GMAC_ADDR_EN_B 16 #define GMAC_RECV_CTRL_STRIP_PAD_EN_B 3 #define GMAC_RECV_CTRL_RUNT_PKT_EN_B 4 -- cgit v0.10.2 From a52047770f2720c9a3d49e2c95a269ef29463415 Mon Sep 17 00:00:00 2001 From: Sheng Li Date: Tue, 22 Mar 2016 16:06:25 +0800 Subject: net: hns: fixed the bug about GMACs mac setting When sending a pause frame out from GMACs, the packets' source MAC address does not match the GMACs' MAC address. It causes by the condition before the mac address setting routine for GMACs, the mac address cannot be set into loacal mac table for service ports. It obviously the condition needs to be deleted. Signed-off-by: Sheng Li Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index b8cf0e4..6e2b76e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -422,19 +422,17 @@ static void hns_gmac_set_mac_addr(void *mac_drv, char *mac_addr) { struct mac_driver *drv = (struct mac_driver *)mac_drv; - if (drv->mac_id >= DSAF_SERVICE_NW_NUM) { - u32 high_val = mac_addr[1] | (mac_addr[0] << 8); + u32 high_val = mac_addr[1] | (mac_addr[0] << 8); - u32 low_val = mac_addr[5] | (mac_addr[4] << 8) - | (mac_addr[3] << 16) | (mac_addr[2] << 24); + u32 low_val = mac_addr[5] | (mac_addr[4] << 8) + | (mac_addr[3] << 16) | (mac_addr[2] << 24); - u32 val = dsaf_read_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG); - u32 sta_addr_en = dsaf_get_bit(val, GMAC_ADDR_EN_B); + u32 val = dsaf_read_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG); + u32 sta_addr_en = dsaf_get_bit(val, GMAC_ADDR_EN_B); - dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val); - dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, - high_val | (sta_addr_en << GMAC_ADDR_EN_B)); - } + dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val); + dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, + high_val | (sta_addr_en << GMAC_ADDR_EN_B)); } static int hns_gmac_config_loopback(void *mac_drv, enum hnae_loop loop_mode, -- cgit v0.10.2 From 6f80563c03524c658bb4788fa0973f43108670c4 Mon Sep 17 00:00:00 2001 From: Qianqian Xie Date: Tue, 22 Mar 2016 16:06:26 +0800 Subject: net: hns: set xge statistic reg as read only As the user manual of HNS V2 describs, XGE_DFX_CTRL_CFG.xge_dfx_ctrl_cfg should be configed as zero if we want xge statistic reg to be read only. But HNS V1 gets the other meanings. It needs to be identified the process and then config it rightly. Signed-off-by: Qianqian Xie Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 38fc5be..5c1ac9b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -748,8 +748,9 @@ static void hns_dsaf_tbl_stat_en(struct dsaf_device *dsaf_dev) */ static void hns_dsaf_rocee_bp_en(struct dsaf_device *dsaf_dev) { - dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG, - DSAF_FC_XGE_TX_PAUSE_S, 1); + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG, + DSAF_FC_XGE_TX_PAUSE_S, 1); } /* set msk for dsaf exception irq*/ -- cgit v0.10.2 From beecfe9e269574973ea1b37472dc81dd0364d012 Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Tue, 22 Mar 2016 16:06:27 +0800 Subject: net: hns: fix return value of the function about rss Both .get_rxfh and .set_rxfh are always return 0, it should return result from hardware when getting or setting rss. And the rss function should return the correct data type. Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index a7427b8..648b31a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -803,7 +803,7 @@ static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir, /* set the RSS Hash Key if specififed by the user */ if (key) - hns_ppe_set_rss_key(ppe_cb, (int *)key); + hns_ppe_set_rss_key(ppe_cb, (u32 *)key); /* update the shadow RSS table with user specified qids */ memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index f302ef9..06422c2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -27,7 +27,7 @@ void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value) void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb, const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]) { - int key_item = 0; + u32 key_item; for (key_item = 0; key_item < HNS_PPEV2_RSS_KEY_NUM; key_item++) dsaf_write_dev(ppe_cb, PPEV2_RSS_KEY_REG + key_item * 0x4, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 3c4a3bc..2229905 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1173,18 +1173,15 @@ hns_get_rss_key_size(struct net_device *netdev) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - u32 ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, "RSS feature is not supported on this hardware\n"); - return -EOPNOTSUPP; + return 0; } ops = priv->ae_handle->dev->ops; - ret = ops->get_rss_key_size(priv->ae_handle); - - return ret; + return ops->get_rss_key_size(priv->ae_handle); } static u32 @@ -1192,18 +1189,15 @@ hns_get_rss_indir_size(struct net_device *netdev) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - u32 ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, "RSS feature is not supported on this hardware\n"); - return -EOPNOTSUPP; + return 0; } ops = priv->ae_handle->dev->ops; - ret = ops->get_rss_indir_size(priv->ae_handle); - - return ret; + return ops->get_rss_indir_size(priv->ae_handle); } static int @@ -1211,7 +1205,6 @@ hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - int ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, @@ -1224,9 +1217,7 @@ hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) if (!indir) return 0; - ret = ops->get_rss(priv->ae_handle, indir, key, hfunc); - - return 0; + return ops->get_rss(priv->ae_handle, indir, key, hfunc); } static int @@ -1235,7 +1226,6 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - int ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, @@ -1252,9 +1242,7 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, if (!indir) return 0; - ret = ops->set_rss(priv->ae_handle, indir, key, hfunc); - - return 0; + return ops->set_rss(priv->ae_handle, indir, key, hfunc); } static struct ethtool_ops hns_ethtool_ops = { -- cgit v0.10.2 From 717dd807383b074c1ff3852a05ea5d1289827993 Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Tue, 22 Mar 2016 16:06:28 +0800 Subject: net: hns: fixes a bug of RSS If trying to get receive flow hash indirection table by ethtool, it needs to call .get_rxnfc to get ring number first. So this patch implements the .get_rxnfc of ethtool. And the data type of rss_indir_table is u32, it has to be multiply by the width of data type when using memcpy. Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 648b31a..285c893 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -791,7 +791,8 @@ static int hns_ae_get_rss(struct hnae_handle *handle, u32 *indir, u8 *key, memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE); /* update the current hash->queue mappings from the shadow RSS table */ - memcpy(indir, ppe_cb->rss_indir_table, HNS_PPEV2_RSS_IND_TBL_SIZE); + memcpy(indir, ppe_cb->rss_indir_table, + HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir)); return 0; } @@ -806,7 +807,8 @@ static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir, hns_ppe_set_rss_key(ppe_cb, (u32 *)key); /* update the shadow RSS table with user specified qids */ - memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE); + memcpy(ppe_cb->rss_indir_table, indir, + HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir)); /* now update the hardware */ hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 2229905..9c3ba65 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1245,6 +1245,23 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, return ops->set_rss(priv->ae_handle, indir, key, hfunc); } +static int hns_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = priv->ae_handle->q_num; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + static struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, @@ -1268,6 +1285,7 @@ static struct ethtool_ops hns_ethtool_ops = { .get_rxfh_indir_size = hns_get_rss_indir_size, .get_rxfh = hns_get_rss, .set_rxfh = hns_set_rss, + .get_rxnfc = hns_get_rxnfc, }; void hns_ethtool_set_ops(struct net_device *ndev) -- cgit v0.10.2 From da3488bbde50c274a327afa67a141c86c065e0fa Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Tue, 22 Mar 2016 16:06:29 +0800 Subject: net: hns: fix the bug about mtu setting In chip V1, the maximum mtu value is 9600. But in chip V2, it is 9728. And it is always configurates as 9600 before this patch. Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 138737d..50237fb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -467,8 +467,10 @@ int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu) struct mac_driver *drv = hns_mac_get_drv(mac_cb); u32 buf_size = mac_cb->dsaf_dev->buf_size; u32 new_frm = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ? + MAC_MAX_MTU : MAC_MAX_MTU_V2; - if ((new_mtu < MAC_MIN_MTU) || (new_frm > MAC_MAX_MTU) || + if ((new_mtu < MAC_MIN_MTU) || (new_frm > max_frm) || (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size)) return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 0f60968..7b47701 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -26,6 +26,7 @@ struct dsaf_device; #define MAC_DEFAULT_MTU (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN + ETH_DATA_LEN) #define MAC_MAX_MTU 9600 +#define MAC_MAX_MTU_V2 9728 #define MAC_MIN_MTU 68 #define MAC_DEFAULT_PAUSE_TIME 0xff diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index 06422c2..5b7ae5f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -343,6 +343,9 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { hns_ppe_set_vlan_strip(ppe_cb, 0); + dsaf_write_dev(ppe_cb, PPE_CFG_MAX_FRAME_LEN_REG, + HNS_PPEV2_MAX_FRAME_LEN); + /* set default RSS key in h/w */ hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index 0f5cb69..e9c0ec2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -30,6 +30,8 @@ #define HNS_PPEV2_RSS_KEY_SIZE 40 /* in bytes or 320 bits */ #define HNS_PPEV2_RSS_KEY_NUM (HNS_PPEV2_RSS_KEY_SIZE / sizeof(u32)) +#define HNS_PPEV2_MAX_FRAME_LEN 0X980 + enum ppe_qid_mode { PPE_QID_MODE0 = 0, /* fixed queue id mode */ PPE_QID_MODE1, /* switch:128VM non switch:6Port/4VM/4TC */ -- cgit v0.10.2 From 211b1384030d3b94f962573f9bc5e5726bf79197 Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Tue, 22 Mar 2016 16:06:30 +0800 Subject: net: hns: adds limitation for debug port mtu If mtu for debug port is set more than 1500, it may cause that packets are dropped by ppe. So maximum value for debug port should be 1500. Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 50237fb..a38084a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -470,6 +470,9 @@ int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu) u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ? MAC_MAX_MTU : MAC_MAX_MTU_V2; + if (mac_cb->mac_type == HNAE_PORT_DEBUG) + max_frm = MAC_MAX_MTU_DBG; + if ((new_mtu < MAC_MIN_MTU) || (new_frm > max_frm) || (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size)) return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 7b47701..823b6e7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -28,6 +28,7 @@ struct dsaf_device; #define MAC_MAX_MTU 9600 #define MAC_MAX_MTU_V2 9728 #define MAC_MIN_MTU 68 +#define MAC_MAX_MTU_DBG MAC_DEFAULT_MTU #define MAC_DEFAULT_PAUSE_TIME 0xff -- cgit v0.10.2 From 0b51b1dc7920bb722ee74dab4bb3b8799779e9b8 Mon Sep 17 00:00:00 2001 From: Daode Huang Date: Tue, 22 Mar 2016 16:06:31 +0800 Subject: net: hns: bug fix about the overflow of mss When set MTU to the minimum value 68, there are increasing number of error packets occur, which is caused by the overflowed value of mss. This patch fix the bug. Signed-off-by: Daode Huang Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index ef517af..71aa37b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -48,7 +48,6 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct sk_buff *skb; - int skb_tmp_len; __be16 protocol; u8 bn_pid = 0; u8 rrcfv = 0; @@ -94,13 +93,13 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); /* check for tcp/udp header */ - if (iphdr->protocol == IPPROTO_TCP) { + if (iphdr->protocol == IPPROTO_TCP && + skb_is_gso(skb)) { hnae_set_bit(tvsvsn, HNSV2_TXD_TSE_B, 1); - skb_tmp_len = SKB_TMP_LEN(skb); l4_len = tcp_hdrlen(skb); - mss = mtu - skb_tmp_len - ETH_FCS_LEN; - paylen = skb->len - skb_tmp_len; + mss = skb_shinfo(skb)->gso_size; + paylen = skb->len - SKB_TMP_LEN(skb); } } else if (skb->protocol == htons(ETH_P_IPV6)) { hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1); @@ -108,13 +107,13 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); /* check for tcp/udp header */ - if (ipv6hdr->nexthdr == IPPROTO_TCP) { + if (ipv6hdr->nexthdr == IPPROTO_TCP && + skb_is_gso(skb) && skb_is_gso_v6(skb)) { hnae_set_bit(tvsvsn, HNSV2_TXD_TSE_B, 1); - skb_tmp_len = SKB_TMP_LEN(skb); l4_len = tcp_hdrlen(skb); - mss = mtu - skb_tmp_len - ETH_FCS_LEN; - paylen = skb->len - skb_tmp_len; + mss = skb_shinfo(skb)->gso_size; + paylen = skb->len - SKB_TMP_LEN(skb); } } desc->tx.ip_offset = ip_offset; -- cgit v0.10.2 From ad0ea1989cc4d5905941d0a9e62c63ad6d859cef Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 22 Mar 2016 09:19:38 +0100 Subject: ipv4: fix broadcast packets reception Currently, ingress ipv4 broadcast datagrams are dropped since, in udp_v4_early_demux(), ip_check_mc_rcu() is invoked even on bcast packets. This patch addresses the issue, invoking ip_check_mc_rcu() only for mcast packets. Fixes: 6e5403093261 ("ipv4/udp: Verify multicast group is ours in upd_v4_early_demux()") Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 836abe5..08eed5e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2070,10 +2070,14 @@ void udp_v4_early_demux(struct sk_buff *skb) if (!in_dev) return; - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return; + /* we are supposed to accept bcast packets */ + if (skb->pkt_type == PACKET_MULTICAST) { + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; + } + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); } else if (skb->pkt_type == PACKET_HOST) { -- cgit v0.10.2 From 6e9bdc7271ac8e2af58a2c9a87551d9bd49337a1 Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Mon, 21 Mar 2016 23:08:11 +0200 Subject: fsl/fman: Workaround for Errata A-007273 Errata A-007273 (For FMan V3 devices only): FMan soft reset is not finished properly if one of the Ethernet MAC clocks is disabled Workaround: Re-enable all disabled MAC clocks through the DCFG_CCSR_DEVDISR2 register prior to issuing an FMAN soft reset. Re-disable the MAC clocks after the FMAN soft reset is done. Signed-off-by: Igal Liberman Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 79a210a..ea83712 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -35,6 +35,7 @@ #include "fman.h" #include "fman_muram.h" +#include #include #include #include @@ -1871,6 +1872,90 @@ err_fm_state: return -EINVAL; } +static int fman_reset(struct fman *fman) +{ + u32 count; + int err = 0; + + if (fman->state->rev_info.major < 6) { + iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc); + /* Wait for reset completion */ + count = 100; + do { + udelay(1); + } while (((ioread32be(&fman->fpm_regs->fm_rstc)) & + FPM_RSTC_FM_RESET) && --count); + if (count == 0) + err = -EBUSY; + + goto _return; + } else { + struct device_node *guts_node; + struct ccsr_guts __iomem *guts_regs; + u32 devdisr2, reg; + + /* Errata A007273 */ + guts_node = + of_find_compatible_node(NULL, NULL, + "fsl,qoriq-device-config-2.0"); + if (!guts_node) { + dev_err(fman->dev, "%s: Couldn't find guts node\n", + __func__); + goto guts_node; + } + + guts_regs = of_iomap(guts_node, 0); + if (!guts_regs) { + dev_err(fman->dev, "%s: Couldn't map %s regs\n", + __func__, guts_node->full_name); + goto guts_regs; + } +#define FMAN1_ALL_MACS_MASK 0xFCC00000 +#define FMAN2_ALL_MACS_MASK 0x000FCC00 + /* Read current state */ + devdisr2 = ioread32be(&guts_regs->devdisr2); + if (fman->dts_params.id == 0) + reg = devdisr2 & ~FMAN1_ALL_MACS_MASK; + else + reg = devdisr2 & ~FMAN2_ALL_MACS_MASK; + + /* Enable all MACs */ + iowrite32be(reg, &guts_regs->devdisr2); + + /* Perform FMan reset */ + iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc); + + /* Wait for reset completion */ + count = 100; + do { + udelay(1); + } while (((ioread32be(&fman->fpm_regs->fm_rstc)) & + FPM_RSTC_FM_RESET) && --count); + if (count == 0) { + iounmap(guts_regs); + of_node_put(guts_node); + err = -EBUSY; + goto _return; + } + + /* Restore devdisr2 value */ + iowrite32be(devdisr2, &guts_regs->devdisr2); + + iounmap(guts_regs); + of_node_put(guts_node); + + goto _return; + +guts_regs: + of_node_put(guts_node); +guts_node: + dev_dbg(fman->dev, "%s: Didn't perform FManV3 reset due to Errata A007273!\n", + __func__); + } +_return: + return err; +} + static int fman_init(struct fman *fman) { struct fman_cfg *cfg = NULL; @@ -1914,22 +1999,9 @@ static int fman_init(struct fman *fman) fman->liodn_base[i] = liodn_base; } - /* FMan Reset (supported only for FMan V2) */ - if (fman->state->rev_info.major >= 6) { - /* Errata A007273 */ - dev_dbg(fman->dev, "%s: FManV3 reset is not supported!\n", - __func__); - } else { - iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc); - /* Wait for reset completion */ - count = 100; - do { - udelay(1); - } while (((ioread32be(&fman->fpm_regs->fm_rstc)) & - FPM_RSTC_FM_RESET) && --count); - if (count == 0) - return -EBUSY; - } + err = fman_reset(fman); + if (err) + return err; if (ioread32be(&fman->qmi_regs->fmqm_gs) & QMI_GS_HALT_NOT_BUSY) { resume(fman->fpm_regs); -- cgit v0.10.2 From 4cfc86f3dae6ca38ed49cdd78f458a03d4d87992 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Tue, 22 Mar 2016 14:56:57 -0400 Subject: ipv4: initialize flowi4_flags before calling fib_lookup() Field fl4.flowi4_flags is not initialized in fib_compute_spec_dst() before calling fib_lookup(), which means fib_table_lookup() is using non-deterministic data at this line: if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { Fix by initializing the entire fl4 structure, which will prevent similar issues as fields are added in the future by ensuring that all fields are initialized to zero unless explicitly initialized to another value. Fixes: 58189ca7b2741 ("net: Fix vti use case with oif in dst lookups") Suggested-by: David Ahern Signed-off-by: Lance Richardson Acked-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 21add55..8a9246d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) struct in_device *in_dev; struct fib_result res; struct rtable *rt; - struct flowi4 fl4; struct net *net; int scope; @@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { - fl4.flowi4_oif = 0; - fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.daddr = ip_hdr(skb)->saddr; - fl4.saddr = 0; - fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); - fl4.flowi4_scope = scope; - fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - fl4.flowi4_tun_key.tun_id = 0; + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_scope = scope, + .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { -- cgit v0.10.2 From 9a0384c020b055a555500906be8ac314c92f3998 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 22 Mar 2016 22:27:38 +0300 Subject: macb: fix PHY reset The driver calls gpiod_set_value() with GPIOD_OUT_* instead of 0 and 1, as a result the PHY isn't really put back into reset state in macb_remove(). Moreover, the driver assumes that something else has set the GPIO direction to output, so if it has not, the PHY may not be taken out of reset in macb_probe() either... Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 3ce6095..6619178 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2959,7 +2959,7 @@ static int macb_probe(struct platform_device *pdev) int gpio = of_get_named_gpio(phy_node, "reset-gpios", 0); if (gpio_is_valid(gpio)) bp->reset_gpio = gpio_to_desc(gpio); - gpiod_set_value(bp->reset_gpio, GPIOD_OUT_HIGH); + gpiod_direction_output(bp->reset_gpio, 1); } of_node_put(phy_node); @@ -3029,7 +3029,7 @@ static int macb_remove(struct platform_device *pdev) mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ - gpiod_set_value(bp->reset_gpio, GPIOD_OUT_LOW); + gpiod_set_value(bp->reset_gpio, 0); unregister_netdev(dev); clk_disable_unprepare(bp->tx_clk); -- cgit v0.10.2 From 6f57e56a1527d58264ae126eff94fdac067744fc Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 22 Mar 2016 17:05:51 +0100 Subject: Revert "vsock: Fix blocking ops call in prepare_to_wait" This reverts commit 5988818008257ca42010d6b43a3e0e48afec9898 ("vsock: Fix blocking ops call in prepare_to_wait") The commit reverted with this patch caused us to potentially miss wakeups. Since the condition is not checked between the prepare_to_wait and the schedule(), if a wakeup happens after the condition is checked but before the sleep happens, we will miss it. ( A description of the problem can be found here: http://www.makelinux.net/ldd3/chp-6-sect-2 ). By reverting the patch, the behaviour is still incorrect (since we shouldn't sleep between the prepare_to_wait and the schedule) but at least it will not miss wakeups. The next patch in the series actually fixes the behaviour. Signed-off-by: Claudio Imbrenda Signed-off-by: David S. Miller diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bbe65dc..7fd1220 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1557,6 +1557,8 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + while (total_written < len) { ssize_t written; @@ -1576,9 +1578,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out_wait; release_sock(sk); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = schedule_timeout(timeout); - finish_wait(sk_sleep(sk), &wait); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); @@ -1588,6 +1588,8 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out_wait; } + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); } /* These checks occur both as part of and after the loop @@ -1633,6 +1635,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, out_wait: if (total_written > 0) err = total_written; + finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; @@ -1713,6 +1716,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (err < 0) goto out; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (1) { s64 ready = vsock_stream_has_data(vsk); @@ -1723,7 +1727,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, */ err = -ENOMEM; - goto out; + goto out_wait; } else if (ready > 0) { ssize_t read; @@ -1746,7 +1750,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, vsk, target, read, !(flags & MSG_PEEK), &recv_data); if (err < 0) - goto out; + goto out_wait; if (read >= target || flags & MSG_PEEK) break; @@ -1769,9 +1773,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, break; release_sock(sk); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = schedule_timeout(timeout); - finish_wait(sk_sleep(sk), &wait); lock_sock(sk); if (signal_pending(current)) { @@ -1781,6 +1783,9 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, err = -EAGAIN; break; } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); } } @@ -1811,6 +1816,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, err = copied; } +out_wait: + finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; -- cgit v0.10.2 From f7f9b5e7f8eccfd68ffa7b8d74b07c478bb9e7f0 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 22 Mar 2016 17:05:52 +0100 Subject: AF_VSOCK: Shrink the area influenced by prepare_to_wait When a thread is prepared for waiting by calling prepare_to_wait, sleeping is not allowed until either the wait has taken place or finish_wait has been called. The existing code in af_vsock imposed unnecessary no-sleep assumptions to a broad list of backend functions. This patch shrinks the influence of prepare_to_wait to the area where it is strictly needed, therefore relaxing the no-sleep restriction there. Signed-off-by: Claudio Imbrenda Signed-off-by: David S. Miller diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fd1220..3dce53e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait_error; + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; - goto out_wait_error; + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_err) { err = -sk->sk_err; - goto out_wait_error; - } else + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + } else { err = 0; + } out_wait: finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; - -out_wait_error: - sk->sk_state = SS_UNCONNECTED; - sock->state = SS_UNCONNECTED; - goto out_wait; } static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) @@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) listener->sk_err == 0) { release_sock(listener); timeout = schedule_timeout(timeout); + finish_wait(sk_sleep(listener), &wait); lock_sock(listener); if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait; + goto out; } else if (timeout == 0) { err = -EAGAIN; - goto out_wait; + goto out; } prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); } + finish_wait(sk_sleep(listener), &wait); if (listener->sk_err) err = -listener->sk_err; @@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) */ if (err) { vconnected->rejected = true; - release_sock(connected); - sock_put(connected); - goto out_wait; + } else { + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); } - newsock->state = SS_CONNECTED; - sock_graft(connected, newsock); release_sock(connected); sock_put(connected); } -out_wait: - finish_wait(sk_sleep(listener), &wait); out: release_sock(listener); return err; @@ -1557,11 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (total_written < len) { ssize_t written; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (vsock_stream_has_space(vsk) == 0 && sk->sk_err == 0 && !(sk->sk_shutdown & SEND_SHUTDOWN) && @@ -1570,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; - goto out_wait; + finish_wait(sk_sleep(sk), &wait); + goto out_err; } err = transport->notify_send_pre_block(vsk, &send_data); - if (err < 0) - goto out_wait; + if (err < 0) { + finish_wait(sk_sleep(sk), &wait); + goto out_err; + } release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait; + finish_wait(sk_sleep(sk), &wait); + goto out_err; } else if (timeout == 0) { err = -EAGAIN; - goto out_wait; + finish_wait(sk_sleep(sk), &wait); + goto out_err; } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } + finish_wait(sk_sleep(sk), &wait); /* These checks occur both as part of and after the loop * conditional since we need to check before and after @@ -1598,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, */ if (sk->sk_err) { err = -sk->sk_err; - goto out_wait; + goto out_err; } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || (vsk->peer_shutdown & RCV_SHUTDOWN)) { err = -EPIPE; - goto out_wait; + goto out_err; } err = transport->notify_send_pre_enqueue(vsk, &send_data); if (err < 0) - goto out_wait; + goto out_err; /* Note that enqueue will only write as many bytes as are free * in the produce queue, so we don't need to ensure len is @@ -1620,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, len - total_written); if (written < 0) { err = -ENOMEM; - goto out_wait; + goto out_err; } total_written += written; @@ -1628,14 +1633,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, err = transport->notify_send_post_enqueue( vsk, written, &send_data); if (err < 0) - goto out_wait; + goto out_err; } -out_wait: +out_err: if (total_written > 0) err = total_written; - finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; @@ -1716,21 +1720,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (err < 0) goto out; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (1) { - s64 ready = vsock_stream_has_data(vsk); + s64 ready; - if (ready < 0) { - /* Invalid queue pair content. XXX This should be - * changed to a connection reset in a later change. - */ + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + ready = vsock_stream_has_data(vsk); - err = -ENOMEM; - goto out_wait; - } else if (ready > 0) { + if (ready == 0) { + if (sk->sk_err != 0 || + (sk->sk_shutdown & RCV_SHUTDOWN) || + (vsk->peer_shutdown & SEND_SHUTDOWN)) { + finish_wait(sk_sleep(sk), &wait); + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) { + finish_wait(sk_sleep(sk), &wait); + break; + } + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + finish_wait(sk_sleep(sk), &wait); + break; + } else if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + } else { ssize_t read; + finish_wait(sk_sleep(sk), &wait); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should + * be changed to a connection reset in a later + * change. + */ + + err = -ENOMEM; + goto out; + } + err = transport->notify_recv_pre_dequeue( vsk, target, &recv_data); if (err < 0) @@ -1750,42 +1794,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, vsk, target, read, !(flags & MSG_PEEK), &recv_data); if (err < 0) - goto out_wait; + goto out; if (read >= target || flags & MSG_PEEK) break; target -= read; - } else { - if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) - || (vsk->peer_shutdown & SEND_SHUTDOWN)) { - break; - } - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; - break; - } - - err = transport->notify_recv_pre_block( - vsk, target, &recv_data); - if (err < 0) - break; - - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); - break; - } else if (timeout == 0) { - err = -EAGAIN; - break; - } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); } } @@ -1816,8 +1830,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, err = copied; } -out_wait: - finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; -- cgit v0.10.2 From d57019d1858a6f9b3ca05d76d793466ae428cfa3 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 23 Mar 2016 00:44:40 +0300 Subject: at803x: fix reset handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver of course "knows" that the chip's reset signal is active low, so it drives the GPIO to 0 to reset the PHY and to 1 otherwise; however all this will only work iff the GPIO is specified as active-high in the device tree! I think both the driver and the device trees (if there are any -- I was unable to find them) need to be fixed in this case... Fixes: 13a56b449325 ("net: phy: at803x: Add support for hardware reset") Signed-off-by: Sergei Shtylyov Acked-by: Uwe Kleine-König Signed-off-by: David S. Miller diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 1e901c7..d7f03fa 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -277,7 +277,7 @@ static int at803x_probe(struct phy_device *phydev) if (!priv) return -ENOMEM; - gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(gpiod_reset)) return PTR_ERR(gpiod_reset); @@ -362,10 +362,10 @@ static void at803x_link_change_notify(struct phy_device *phydev) at803x_context_save(phydev, &context); - gpiod_set_value(priv->gpiod_reset, 0); - msleep(1); gpiod_set_value(priv->gpiod_reset, 1); msleep(1); + gpiod_set_value(priv->gpiod_reset, 0); + msleep(1); at803x_context_restore(phydev, &context); -- cgit v0.10.2 From 9eb13f65c305ec6687d7666abce14e6c047fa872 Mon Sep 17 00:00:00 2001 From: Sebastian Frias Date: Wed, 23 Mar 2016 11:49:09 +0100 Subject: net: phy: at803x: Request 'reset' GPIO only for AT8030 PHY This removes the dependency on GPIOLIB for non faulty PHYs. Indeed, without this patch, if GPIOLIB is not selected devm_gpiod_get_optional() will return -ENOSYS and the driver probe call will fail, regardless of the actual PHY hardware. Out of the 3 PHYs supported by this driver (AT8030, AT8031, AT8035), only AT8030 presents the issues that commit 13a56b449325 ("net: phy: at803x: Add support for hardware reset") attempts to work-around by using a 'reset' GPIO line. Hence, only AT8030 should depend on GPIOLIB operating properly. Fixes: 13a56b449325 ("net: phy: at803x: Add support for hardware reset") Signed-off-by: Sebastian Frias Signed-off-by: David S. Miller diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index d7f03fa..b3ffaee 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -277,12 +277,16 @@ static int at803x_probe(struct phy_device *phydev) if (!priv) return -ENOMEM; + if (phydev->drv->phy_id != ATH8030_PHY_ID) + goto does_not_require_reset_workaround; + gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(gpiod_reset)) return PTR_ERR(gpiod_reset); priv->gpiod_reset = gpiod_reset; +does_not_require_reset_workaround: phydev->priv = priv; return 0; -- cgit v0.10.2 From 621e49f6d80a731dd3073569571c4579352cfb1c Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 23 Mar 2016 01:06:04 +0200 Subject: net: mediatek: fix checking for NULL instead of IS_ERR() in .probe devm_ioremap_resource() returns ERR_PTR() value on error, it never returns NULL, fix it and propagate the returned error upwards. Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Matthias Brugger Acked-by: John Crispin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7f2126b..e0b68af 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1690,8 +1690,8 @@ static int mtk_probe(struct platform_device *pdev) return -ENOMEM; eth->base = devm_ioremap_resource(&pdev->dev, res); - if (!eth->base) - return -EADDRNOTAVAIL; + if (IS_ERR(eth->base)) + return PTR_ERR(eth->base); spin_lock_init(ð->page_lock); -- cgit v0.10.2 From 5197f3499c470ccc4b247db66ff883e597e3adda Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 22 Mar 2016 16:18:07 -0700 Subject: net: Reset encap_level to avoid resetting features on inner IP headers This patch corrects an oversight in which we were allowing the encap_level value to pass from the outer headers to the inner headers. As a result we were incorrectly identifying UDP or GRE tunnels as also making use of ipip or sit when the second header actually represented a tunnel encapsulated in either a UDP or GRE tunnel which already had the features masked. Fixes: 76443456227097179c1482 ("net: Move GSO csum into SKB_GSO_CB") Reported-by: Tom Herbert Signed-off-by: Alexander Duyck Acked-by: Tom Herbert Signed-off-by: David S. Miller diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index dd03161..c47539d 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -49,6 +49,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* setup inner skb. */ skb->encapsulation = 0; + SKB_GSO_CB(skb)->encap_level = 0; __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8007f73..0ed2daf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -56,6 +56,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, /* setup inner skb. */ skb->encapsulation = 0; + SKB_GSO_CB(skb)->encap_level = 0; __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); -- cgit v0.10.2 From 1f461dcdd296eecedaffffc6bae2bfa90bd7eb89 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 23 Mar 2016 16:38:55 +0100 Subject: ppp: take reference on channels netns Let channels hold a reference on their network namespace. Some channel types, like ppp_async and ppp_synctty, can have their userspace controller running in a different namespace. Therefore they can't rely on them to preclude their netns from being removed from under them. ================================================================== BUG: KASAN: use-after-free in ppp_unregister_channel+0x372/0x3a0 at addr ffff880064e217e0 Read of size 8 by task syz-executor/11581 ============================================================================= BUG net_namespace (Not tainted): kasan: bad access detected ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in copy_net_ns+0x6b/0x1a0 age=92569 cpu=3 pid=6906 [< none >] ___slab_alloc+0x4c7/0x500 kernel/mm/slub.c:2440 [< none >] __slab_alloc+0x4c/0x90 kernel/mm/slub.c:2469 [< inline >] slab_alloc_node kernel/mm/slub.c:2532 [< inline >] slab_alloc kernel/mm/slub.c:2574 [< none >] kmem_cache_alloc+0x23a/0x2b0 kernel/mm/slub.c:2579 [< inline >] kmem_cache_zalloc kernel/include/linux/slab.h:597 [< inline >] net_alloc kernel/net/core/net_namespace.c:325 [< none >] copy_net_ns+0x6b/0x1a0 kernel/net/core/net_namespace.c:360 [< none >] create_new_namespaces+0x2f6/0x610 kernel/kernel/nsproxy.c:95 [< none >] copy_namespaces+0x297/0x320 kernel/kernel/nsproxy.c:150 [< none >] copy_process.part.35+0x1bf4/0x5760 kernel/kernel/fork.c:1451 [< inline >] copy_process kernel/kernel/fork.c:1274 [< none >] _do_fork+0x1bc/0xcb0 kernel/kernel/fork.c:1723 [< inline >] SYSC_clone kernel/kernel/fork.c:1832 [< none >] SyS_clone+0x37/0x50 kernel/kernel/fork.c:1826 [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a kernel/arch/x86/entry/entry_64.S:185 INFO: Freed in net_drop_ns+0x67/0x80 age=575 cpu=2 pid=2631 [< none >] __slab_free+0x1fc/0x320 kernel/mm/slub.c:2650 [< inline >] slab_free kernel/mm/slub.c:2805 [< none >] kmem_cache_free+0x2a0/0x330 kernel/mm/slub.c:2814 [< inline >] net_free kernel/net/core/net_namespace.c:341 [< none >] net_drop_ns+0x67/0x80 kernel/net/core/net_namespace.c:348 [< none >] cleanup_net+0x4e5/0x600 kernel/net/core/net_namespace.c:448 [< none >] process_one_work+0x794/0x1440 kernel/kernel/workqueue.c:2036 [< none >] worker_thread+0xdb/0xfc0 kernel/kernel/workqueue.c:2170 [< none >] kthread+0x23f/0x2d0 kernel/drivers/block/aoe/aoecmd.c:1303 [< none >] ret_from_fork+0x3f/0x70 kernel/arch/x86/entry/entry_64.S:468 INFO: Slab 0xffffea0001938800 objects=3 used=0 fp=0xffff880064e20000 flags=0x5fffc0000004080 INFO: Object 0xffff880064e20000 @offset=0 fp=0xffff880064e24200 CPU: 1 PID: 11581 Comm: syz-executor Tainted: G B 4.4.0+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 00000000ffffffff ffff8800662c7790 ffffffff8292049d ffff88003e36a300 ffff880064e20000 ffff880064e20000 ffff8800662c77c0 ffffffff816f2054 ffff88003e36a300 ffffea0001938800 ffff880064e20000 0000000000000000 Call Trace: [< inline >] __dump_stack kernel/lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 kernel/lib/dump_stack.c:50 [] print_trailer+0xf4/0x150 kernel/mm/slub.c:654 [] object_err+0x2f/0x40 kernel/mm/slub.c:661 [< inline >] print_address_description kernel/mm/kasan/report.c:138 [] kasan_report_error+0x215/0x530 kernel/mm/kasan/report.c:236 [< inline >] kasan_report kernel/mm/kasan/report.c:259 [] __asan_report_load8_noabort+0x3e/0x40 kernel/mm/kasan/report.c:280 [< inline >] ? ppp_pernet kernel/include/linux/compiler.h:218 [] ? ppp_unregister_channel+0x372/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392 [< inline >] ppp_pernet kernel/include/linux/compiler.h:218 [] ppp_unregister_channel+0x372/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392 [< inline >] ? ppp_pernet kernel/drivers/net/ppp/ppp_generic.c:293 [] ? ppp_unregister_channel+0xe6/0x3a0 kernel/drivers/net/ppp/ppp_generic.c:2392 [] ppp_asynctty_close+0xa3/0x130 kernel/drivers/net/ppp/ppp_async.c:241 [] ? async_lcp_peek+0x5b0/0x5b0 kernel/drivers/net/ppp/ppp_async.c:1000 [] tty_ldisc_close.isra.1+0x99/0xe0 kernel/drivers/tty/tty_ldisc.c:478 [] tty_ldisc_kill+0x40/0x170 kernel/drivers/tty/tty_ldisc.c:744 [] tty_ldisc_release+0x1b3/0x260 kernel/drivers/tty/tty_ldisc.c:772 [] tty_release+0xac1/0x13e0 kernel/drivers/tty/tty_io.c:1901 [] ? release_tty+0x320/0x320 kernel/drivers/tty/tty_io.c:1688 [] __fput+0x236/0x780 kernel/fs/file_table.c:208 [] ____fput+0x15/0x20 kernel/fs/file_table.c:244 [] task_work_run+0x16b/0x200 kernel/kernel/task_work.c:115 [< inline >] exit_task_work kernel/include/linux/task_work.h:21 [] do_exit+0x8b5/0x2c60 kernel/kernel/exit.c:750 [] ? debug_check_no_locks_freed+0x290/0x290 kernel/kernel/locking/lockdep.c:4123 [] ? mm_update_next_owner+0x6f0/0x6f0 kernel/kernel/exit.c:357 [] ? __dequeue_signal+0x136/0x470 kernel/kernel/signal.c:550 [] ? recalc_sigpending_tsk+0x13b/0x180 kernel/kernel/signal.c:145 [] do_group_exit+0x108/0x330 kernel/kernel/exit.c:880 [] get_signal+0x5e4/0x14f0 kernel/kernel/signal.c:2307 [< inline >] ? kretprobe_table_lock kernel/kernel/kprobes.c:1113 [] ? kprobe_flush_task+0xb5/0x450 kernel/kernel/kprobes.c:1158 [] do_signal+0x83/0x1c90 kernel/arch/x86/kernel/signal.c:712 [] ? recycle_rp_inst+0x310/0x310 kernel/include/linux/list.h:655 [] ? setup_sigcontext+0x780/0x780 kernel/arch/x86/kernel/signal.c:165 [] ? finish_task_switch+0x424/0x5f0 kernel/kernel/sched/core.c:2692 [< inline >] ? finish_lock_switch kernel/kernel/sched/sched.h:1099 [] ? finish_task_switch+0x120/0x5f0 kernel/kernel/sched/core.c:2678 [< inline >] ? context_switch kernel/kernel/sched/core.c:2807 [] ? __schedule+0x919/0x1bd0 kernel/kernel/sched/core.c:3283 [] exit_to_usermode_loop+0xf1/0x1a0 kernel/arch/x86/entry/common.c:247 [< inline >] prepare_exit_to_usermode kernel/arch/x86/entry/common.c:282 [] syscall_return_slowpath+0x19f/0x210 kernel/arch/x86/entry/common.c:344 [] int_ret_from_sys_call+0x25/0x9f kernel/arch/x86/entry/entry_64.S:281 Memory state around the buggy address: ffff880064e21680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880064e21700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff880064e21780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880064e21800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880064e21880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 273ec51dd7ce ("net: ppp_generic - introduce net-namespace functionality v2") Reported-by: Baozeng Ding Signed-off-by: Guillaume Nault Reviewed-by: Cyrill Gorcunov Signed-off-by: David S. Miller diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 4fd8610..f572b31 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2307,7 +2307,7 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan) pch->ppp = NULL; pch->chan = chan; - pch->chan_net = net; + pch->chan_net = get_net(net); chan->ppp = pch; init_ppp_file(&pch->file, CHANNEL); pch->file.hdrlen = chan->hdrlen; @@ -2404,6 +2404,8 @@ ppp_unregister_channel(struct ppp_channel *chan) spin_lock_bh(&pn->all_channels_lock); list_del(&pch->list); spin_unlock_bh(&pn->all_channels_lock); + put_net(pch->chan_net); + pch->chan_net = NULL; pch->file.dead = 1; wake_up_interruptible(&pch->file.rwait); -- cgit v0.10.2 From d212b4633c3a99561939f2d423eacf3263850bcd Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 23 Mar 2016 09:43:09 -0700 Subject: hv_netvsc: Fix accessing freed memory in netvsc_change_mtu() struct netvsc_device is freed in rndis_filter_device_remove(). So we save the nvdev->num_chn into a temp variable for later usage. (Please also include this patch into stable branch.) Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 0860849..b8121eb 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -858,6 +858,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct netvsc_device *nvdev = hv_get_drvdata(hdev); struct netvsc_device_info device_info; int limit = ETH_DATA_LEN; + u32 num_chn; int ret = 0; if (nvdev == NULL || nvdev->destroy) @@ -873,6 +874,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (ret) goto out; + num_chn = nvdev->num_chn; + nvdev->start_remove = true; rndis_filter_device_remove(hdev); @@ -883,7 +886,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; - device_info.num_chn = nvdev->num_chn; + device_info.num_chn = num_chn; device_info.max_num_vrss_chns = max_num_vrss_chns; rndis_filter_device_add(hdev, &device_info); -- cgit v0.10.2 From 9efc2f7dcd06e04d7b6a3032ae65bfd628b1aebe Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 23 Mar 2016 09:43:10 -0700 Subject: hv_netvsc: Fix the array sizes to be max supported channels The VRSS_CHANNEL_MAX is the max number of channels supported by Hyper-V hosts. We use it for the related array sizes instead of using NR_CPUS, which may be set to several thousands. This patch reduces possible memory allocation failures. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index b4c6878..8b3bd8e 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -619,6 +619,7 @@ struct nvsp_message { #define NETVSC_PACKET_SIZE 4096 #define VRSS_SEND_TAB_SIZE 16 +#define VRSS_CHANNEL_MAX 64 #define RNDIS_MAX_PKT_DEFAULT 8 #define RNDIS_PKT_ALIGN_DEFAULT 8 @@ -700,13 +701,13 @@ struct netvsc_device { struct net_device *ndev; - struct vmbus_channel *chn_table[NR_CPUS]; + struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX]; u32 send_table[VRSS_SEND_TAB_SIZE]; u32 max_chn; u32 num_chn; spinlock_t sc_lock; /* Protects num_sc_offered variable */ u32 num_sc_offered; - atomic_t queue_sends[NR_CPUS]; + atomic_t queue_sends[VRSS_CHANNEL_MAX]; /* Holds rndis device info */ void *extension; @@ -718,7 +719,7 @@ struct netvsc_device { /* The sub channel callback buffer */ unsigned char *sub_cb_buf; - struct multi_send_data msd[NR_CPUS]; + struct multi_send_data msd[VRSS_CHANNEL_MAX]; u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 47d07c5..d5a54da 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1113,9 +1113,9 @@ int rndis_filter_device_add(struct hv_device *dev, if (ret || rsscap.num_recv_que < 2) goto out; - num_rss_qs = min(device_info->max_num_vrss_chns, rsscap.num_recv_que); + net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que); - net_device->max_chn = rsscap.num_recv_que; + num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn); /* * We will limit the VRSS channels to the number CPUs in the NUMA node -- cgit v0.10.2 From 5e82b4b2a0015ed0a659b22ef2ee3409a3c39e54 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2016 13:47:23 -0500 Subject: net: Fix typos and whitespace. Fix typos. Capitalize CPU, NAPI, RCU consistently. Align structure indentation. No functional change intended; only comment and whitespace changes. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3f6385d..a675205 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -81,8 +81,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, * function. Real network devices commonly used with qdiscs should only return * the driver transmit return codes though - when qdiscs are used, the actual * transmission happens asynchronously, so the value is not propagated to - * higher layers. Virtual network devices transmit synchronously, in this case - * the driver transmit return codes are consumed by dev_queue_xmit(), all + * higher layers. Virtual network devices transmit synchronously; in this case + * the driver transmit return codes are consumed by dev_queue_xmit(), and all * others are propagated to higher layers. */ @@ -129,7 +129,7 @@ static inline bool dev_xmit_complete(int rc) } /* - * Compute the worst case header length according to the protocols + * Compute the worst-case header length according to the protocols * used. */ @@ -246,7 +246,7 @@ struct hh_cache { unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; }; -/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. +/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much. * Alternative is: * dev->hard_header_len ? (dev->hard_header_len + * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 @@ -272,7 +272,7 @@ struct header_ops { }; /* These flag bits are private to the generic network queueing - * layer, they may not be explicitly referenced by any other + * layer; they may not be explicitly referenced by any other * code. */ @@ -286,7 +286,7 @@ enum netdev_state_t { /* - * This structure holds at boot time configured netdevice settings. They + * This structure holds boot-time configured netdevice settings. They * are then used in the device probing. */ struct netdev_boot_setup { @@ -304,7 +304,7 @@ struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means * whoever atomically sets that bit can add this napi_struct - * to the per-cpu poll_list, and whoever clears that bit + * to the per-CPU poll_list, and whoever clears that bit * can remove from the list right before clearing the bit. */ struct list_head poll_list; @@ -350,7 +350,7 @@ typedef enum gro_result gro_result_t; * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in * case skb->dev was changed by rx_handler. * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. - * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. + * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called. * * rx_handlers are functions called from inside __netif_receive_skb(), to do * special processing of the skb, prior to delivery to protocol handlers. @@ -365,19 +365,19 @@ typedef enum gro_result gro_result_t; * Upon return, rx_handler is expected to tell __netif_receive_skb() what to * do with the skb. * - * If the rx_handler consumed to skb in some way, it should return + * If the rx_handler consumed the skb in some way, it should return * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for - * the skb to be delivered in some other ways. + * the skb to be delivered in some other way. * * If the rx_handler changed skb->dev, to divert the skb to another * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the * new device will be called if it exists. * - * If the rx_handler consider the skb should be ignored, it should return + * If the rx_handler decides the skb should be ignored, it should return * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that * are registered on exact device (ptype->dev == skb->dev). * - * If the rx_handler didn't changed skb->dev, but want the skb to be normally + * If the rx_handler didn't change skb->dev, but wants the skb to be normally * delivered, it should return RX_HANDLER_PASS. * * A device without a registered rx_handler will behave as if rx_handler @@ -402,11 +402,11 @@ static inline bool napi_disable_pending(struct napi_struct *n) } /** - * napi_schedule_prep - check if napi can be scheduled - * @n: napi context + * napi_schedule_prep - check if NAPI can be scheduled + * @n: NAPI context * * Test if NAPI routine is already running, and if not mark - * it as running. This is used as a condition variable + * it as running. This is used as a condition variable to * insure only one NAPI poll instance runs. We also make * sure there is no pending NAPI disable. */ @@ -418,7 +418,7 @@ static inline bool napi_schedule_prep(struct napi_struct *n) /** * napi_schedule - schedule NAPI poll - * @n: napi context + * @n: NAPI context * * Schedule NAPI poll routine to be called if it is not already * running. @@ -431,7 +431,7 @@ static inline void napi_schedule(struct napi_struct *n) /** * napi_schedule_irqoff - schedule NAPI poll - * @n: napi context + * @n: NAPI context * * Variant of napi_schedule(), assuming hard irqs are masked. */ @@ -455,7 +455,7 @@ void __napi_complete(struct napi_struct *n); void napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete - * @n: napi context + * @n: NAPI context * * Mark NAPI processing as complete. * Consider using napi_complete_done() instead. @@ -467,32 +467,32 @@ static inline void napi_complete(struct napi_struct *n) /** * napi_hash_add - add a NAPI to global hashtable - * @napi: napi context + * @napi: NAPI context * - * generate a new napi_id and store a @napi under it in napi_hash - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL) + * Generate a new napi_id and store a @napi under it in napi_hash. + * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future linux version. + * so might disappear in a future Linux version. */ void napi_hash_add(struct napi_struct *napi); /** * napi_hash_del - remove a NAPI from global table - * @napi: napi context + * @napi: NAPI context * - * Warning: caller must observe rcu grace period + * Warning: caller must observe RCU grace period * before freeing memory containing @napi, if * this function returns true. * Note: core networking stack automatically calls it - * from netif_napi_del() + * from netif_napi_del(). * Drivers might want to call this helper to combine all - * the needed rcu grace periods into a single one. + * the needed RCU grace periods into a single one. */ bool napi_hash_del(struct napi_struct *napi); /** * napi_disable - prevent NAPI from scheduling - * @n: napi context + * @n: NAPI context * * Stop NAPI from being scheduled on this context. * Waits till any outstanding processing completes. @@ -501,7 +501,7 @@ void napi_disable(struct napi_struct *n); /** * napi_enable - enable NAPI scheduling - * @n: napi context + * @n: NAPI context * * Resume NAPI from being scheduled on this context. * Must be paired with napi_disable. @@ -516,7 +516,7 @@ static inline void napi_enable(struct napi_struct *n) /** * napi_synchronize - wait until NAPI is not running - * @n: napi context + * @n: NAPI context * * Wait until NAPI is done being scheduled on this context. * Waits till any outstanding processing completes but @@ -559,7 +559,7 @@ enum netdev_queue_state_t { struct netdev_queue { /* - * read mostly part + * read-mostly part */ struct net_device *dev; struct Qdisc __rcu *qdisc; @@ -571,7 +571,7 @@ struct netdev_queue { int numa_node; #endif /* - * write mostly part + * write-mostly part */ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; @@ -648,11 +648,11 @@ struct rps_dev_flow_table { /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high order bits - * of flow hash, lower part is cpu number. + * Each entry is a 32bit value. Upper part is the high-order bits + * of flow hash, lower part is CPU number. * rps_cpu_mask is used to partition the space, depending on number of - * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f, + * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { @@ -674,7 +674,7 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, unsigned int index = hash & table->mask; u32 val = hash & ~rps_cpu_mask; - /* We only give a hint, preemption can change cpu under us */ + /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); if (table->ents[index] != val) @@ -807,21 +807,21 @@ struct tc_to_netdev { * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); - * This function is called once when network device is registered. - * The network device can use this to any late stage initializaton - * or semantic validattion. It can fail with an error code which will - * be propogated back to register_netdev + * This function is called once when a network device is registered. + * The network device can use this for any late stage initialization + * or semantic validation. It can fail with an error code which will + * be propagated back to register_netdev. * * void (*ndo_uninit)(struct net_device *dev); * This function is called when device is unregistered or when registration * fails. It is not called if init fails. * * int (*ndo_open)(struct net_device *dev); - * This function is called when network device transistions to the up + * This function is called when a network device transitions to the up * state. * * int (*ndo_stop)(struct net_device *dev); - * This function is called when network device transistions to the down + * This function is called when a network device transitions to the down * state. * * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, @@ -832,7 +832,7 @@ struct tc_to_netdev { * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) - * Required can not be NULL. + * Required; cannot be NULL. * * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -842,34 +842,34 @@ struct tc_to_netdev { * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); - * Called to decide which queue to when device supports multiple + * Called to decide which queue to use when device supports multiple * transmit queues. * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make - * changes to configuration when multicast or promiscious is enabled. + * changes to configuration when multicast or promiscuous is enabled. * * void (*ndo_set_rx_mode)(struct net_device *dev); * This function is called device changes address list filtering. * If driver handles unicast address filtering, it should set - * IFF_UNICAST_FLT to its priv_flags. + * IFF_UNICAST_FLT in its priv_flags. * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address * needs to be changed. If this interface is not defined, the - * mac address can not be changed. + * MAC address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); * Test if Media Access Control address is valid for the device. * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); - * Called when a user request an ioctl which can't be handled by - * the generic interface code. If not defined ioctl's return + * Called when a user requests an ioctl which can't be handled by + * the generic interface code. If not defined ioctls return * not supported error code. * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface - * is retained for legacy reason, new devices should use the bus + * is retained for legacy reasons; new devices should use the bus * interface (PCI) for low level management. * * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); @@ -878,7 +878,7 @@ struct tc_to_netdev { * will return an error. * * void (*ndo_tx_timeout)(struct net_device *dev); - * Callback uses when the transmitter has not made any progress + * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, @@ -896,11 +896,11 @@ struct tc_to_netdev { * neither operation. * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); - * If device support VLAN filtering this function is called when a + * If device supports VLAN filtering this function is called when a * VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid); - * If device support VLAN filtering this function is called when a + * If device supports VLAN filtering this function is called when a * VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); @@ -920,7 +920,7 @@ struct tc_to_netdev { * * Enable or disable the VF ability to query its RSS Redirection Table and * Hash Key. This is needed since on some devices VF share this information - * with PF and querying it may adduce a theoretical security risk. + * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) @@ -1030,20 +1030,20 @@ struct tc_to_netdev { * * void (*ndo_add_vxlan_port)(struct net_device *dev, * sa_family_t sa_family, __be16 port); - * Called by vxlan to notiy a driver about the UDP port and socket - * address family that vxlan is listnening to. It is called only when + * Called by vxlan to notify a driver about the UDP port and socket + * address family that vxlan is listening to. It is called only when * a new port starts listening. The operation is protected by the * vxlan_net->sock_lock. * * void (*ndo_add_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); + * sa_family_t sa_family, __be16 port); * Called by geneve to notify a driver about the UDP port and socket * address family that geneve is listnening to. It is called only when * a new port starts listening. The operation is protected by the * geneve_net->sock_lock. * * void (*ndo_del_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); + * sa_family_t sa_family, __be16 port); * Called by geneve to notify the driver about a UDP port and socket * address family that geneve is not listening to anymore. The operation * is protected by the geneve_net->sock_lock. @@ -1072,9 +1072,9 @@ struct tc_to_netdev { * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. - * netdev_features_t (*ndo_features_check) (struct sk_buff *skb, - * struct net_device *dev - * netdev_features_t features); + * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, + * struct net_device *dev + * netdev_features_t features); * Called by core transmit path to determine if device is capable of * performing offload operations on a given packet. This is to give * the device an opportunity to implement any restrictions that cannot @@ -1088,7 +1088,7 @@ struct tc_to_netdev { * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. * void (*ndo_change_proto_down)(struct net_device *dev, - * bool proto_down); + * bool proto_down); * This function is used to pass protocol port error state information * to the switch driver. The switch driver can react to the proto_down * by doing a phys down on the associated switch port. @@ -1100,7 +1100,7 @@ struct tc_to_netdev { * This function is used to specify the headroom that the skb must * consider when allocation skb during packet reception. Setting * appropriate rx headroom value allows avoiding skb head copy on - * forward. Setting a negative value reset the rx headroom to the + * forward. Setting a negative value resets the rx headroom to the * default value. * */ @@ -1296,7 +1296,7 @@ struct net_device_ops { * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to - * userspace, this means that the order of these flags can change + * userspace; this means that the order of these flags can change * during any kernel release. * * You should have a pretty good reason to be extending these flags. @@ -1414,10 +1414,10 @@ enum netdev_priv_flags { * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices - * @napi_list: List entry, that is used for polling napi devices - * @unreg_list: List entry, that is used, when we are unregistering the - * device, see the function unregister_netdev - * @close_list: List entry, that is used, when we are closing the device + * @napi_list: List entry used for polling NAPI devices + * @unreg_list: List entry when we are unregistering the + * device; see the function unregister_netdev + * @close_list: List entry used when we are closing the device * @ptype_all: Device-specific packet handlers for all protocols * @ptype_specific: Device-specific, protocol-specific packet handlers * @@ -1437,7 +1437,7 @@ enum netdev_priv_flags { * @mpls_features: Mask of features inheritable by MPLS * * @ifindex: interface index - * @group: The group, that the device belongs to + * @group: The group the device belongs to * * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead @@ -1491,7 +1491,7 @@ enum netdev_priv_flags { * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one - * @uc_promisc: Counter, that indicates, that promiscuous mode + * @uc_promisc: Counter that indicates promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() @@ -1499,9 +1499,9 @@ enum netdev_priv_flags { * @mc: multicast mac addresses * @dev_addrs: list of device hw addresses * @queues_kset: Group of all Kobjects in the Tx and RX queues - * @promiscuity: Number of times, the NIC is told to work in - * Promiscuous mode, if it becomes 0 the NIC will - * exit from working in Promiscuous mode + * @promiscuity: Number of times the NIC is told to work in + * promiscuous mode; if it becomes 0 the NIC will + * exit promiscuous mode * @allmulti: Counter, enables or disables allmulticast mode * * @vlan_info: VLAN info @@ -1547,7 +1547,7 @@ enum netdev_priv_flags { * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by - * the watchdog ( see dev_watchdog() ) + * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * * @pcpu_refcnt: Number of references to this device @@ -1664,8 +1664,8 @@ struct net_device { atomic_long_t rx_nohandler; #ifdef CONFIG_WIRELESS_EXT - const struct iw_handler_def * wireless_handlers; - struct iw_public_data * wireless_data; + const struct iw_handler_def *wireless_handlers; + struct iw_public_data *wireless_data; #endif const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; @@ -1718,7 +1718,7 @@ struct net_device { unsigned int allmulti; - /* Protocol specific pointers */ + /* Protocol-specific pointers */ #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; @@ -1748,13 +1748,11 @@ struct net_device { /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; - #ifdef CONFIG_SYSFS struct netdev_rx_queue *_rx; unsigned int num_rx_queues; unsigned int real_num_rx_queues; - #endif unsigned long gro_flush_timeout; @@ -1846,7 +1844,7 @@ struct net_device { struct garp_port __rcu *garp_port; struct mrp_port __rcu *mrp_port; - struct device dev; + struct device dev; const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_rx_queue_group; @@ -1861,9 +1859,9 @@ struct net_device { #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - u8 num_tc; - struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; - u8 prio_tc_map[TC_BITMASK + 1]; + u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) unsigned int fcoe_ddp_xid; @@ -1871,9 +1869,9 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif - struct phy_device *phydev; - struct lock_class_key *qdisc_tx_busylock; - bool proto_down; + struct phy_device *phydev; + struct lock_class_key *qdisc_tx_busylock; + bool proto_down; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2021,7 +2019,7 @@ static inline void *netdev_priv(const struct net_device *dev) /* Set the sysfs device type for the network logical device to allow * fine-grained identification of different network device types. For - * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc. + * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc. */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) @@ -2031,22 +2029,22 @@ static inline void *netdev_priv(const struct net_device *dev) #define NAPI_POLL_WEIGHT 64 /** - * netif_napi_add - initialize a napi context + * netif_napi_add - initialize a NAPI context * @dev: network device - * @napi: napi context + * @napi: NAPI context * @poll: polling function * @weight: default weight * - * netif_napi_add() must be used to initialize a napi context prior to calling - * *any* of the other napi related functions. + * netif_napi_add() must be used to initialize a NAPI context prior to calling + * *any* of the other NAPI-related functions. */ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight); /** - * netif_tx_napi_add - initialize a napi context + * netif_tx_napi_add - initialize a NAPI context * @dev: network device - * @napi: napi context + * @napi: NAPI context * @poll: polling function * @weight: default weight * @@ -2064,22 +2062,22 @@ static inline void netif_tx_napi_add(struct net_device *dev, } /** - * netif_napi_del - remove a napi context - * @napi: napi context + * netif_napi_del - remove a NAPI context + * @napi: NAPI context * - * netif_napi_del() removes a napi context from the network device napi list + * netif_napi_del() removes a NAPI context from the network device NAPI list */ void netif_napi_del(struct napi_struct *napi); struct napi_gro_cb { /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ - void *frag0; + void *frag0; /* Length of frag0. */ unsigned int frag0_len; /* This indicates where we are processing relative to skb->data. */ - int data_offset; + int data_offset; /* This is non-zero if the packet cannot be merged with the new skb. */ u16 flush; @@ -2175,7 +2173,7 @@ struct udp_offload { struct udp_offload_callbacks callbacks; }; -/* often modified stats are per cpu, other are shared (netdev->stats) */ +/* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; u64 rx_bytes; @@ -2272,7 +2270,7 @@ struct netdev_notifier_changeupper_info { struct netdev_notifier_info info; /* must be first */ struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ - bool linking; /* is the nofication for link or unlink */ + bool linking; /* is the notification for link or unlink */ void *upper_info; /* upper dev info */ }; @@ -2737,7 +2735,7 @@ extern int netdev_flow_limit_table_len; #endif /* CONFIG_NET_FLOW_LIMIT */ /* - * Incoming packets are placed on per-cpu queues + * Incoming packets are placed on per-CPU queues */ struct softnet_data { struct list_head poll_list; @@ -2907,7 +2905,7 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their ndo_start_xmit(), - * to give appropriate hint to the cpu. + * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue) { @@ -2921,7 +2919,7 @@ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_que * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their TX completion path, - * to give appropriate hint to the cpu. + * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue) { @@ -3060,7 +3058,7 @@ static inline bool netif_running(const struct net_device *dev) } /* - * Routines to manage the subqueues on a device. We only need start + * Routines to manage the subqueues on a device. We only need start, * stop, and a check if it's stopped. All other device management is * done at the overall netdevice level. * Also test the device if we're multiqueue. @@ -3344,7 +3342,6 @@ void netif_carrier_off(struct net_device *dev); * in a "pending" state, waiting for some external event. For "on- * demand" interfaces, this new state identifies the situation where the * interface is waiting for events to place it in the up state. - * */ static inline void netif_dormant_on(struct net_device *dev) { @@ -3679,7 +3676,7 @@ void dev_uc_init(struct net_device *dev); * * Add newly added addresses to the interface, and release * addresses that have been deleted. - **/ + */ static inline int __dev_uc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), @@ -3695,7 +3692,7 @@ static inline int __dev_uc_sync(struct net_device *dev, * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_uc_sync(). - **/ + */ static inline void __dev_uc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) @@ -3723,7 +3720,7 @@ void dev_mc_init(struct net_device *dev); * * Add newly added addresses to the interface, and release * addresses that have been deleted. - **/ + */ static inline int __dev_mc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), @@ -3739,7 +3736,7 @@ static inline int __dev_mc_sync(struct net_device *dev, * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_mc_sync(). - **/ + */ static inline void __dev_mc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) -- cgit v0.10.2 From 3f735131d9c2523eb54a6c5099fa8c60a4292d48 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 23 Mar 2016 14:54:48 -0700 Subject: hv_netvsc: Fix the order of num_sc_offered decrement Reorder the code in netvsc_sc_open(), so num_sc_offered is only decremented after vmbus_open() is called. This avoid pontential race of removing device before all channels are setup. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index d5a54da..c4e1e04 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -986,12 +986,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); - spin_lock_irqsave(&nvscdev->sc_lock, flags); - nvscdev->num_sc_offered--; - spin_unlock_irqrestore(&nvscdev->sc_lock, flags); - if (nvscdev->num_sc_offered == 0) - complete(&nvscdev->channel_init_wait); - if (chn_index >= nvscdev->num_chn) return; @@ -1004,6 +998,12 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) if (ret == 0) nvscdev->chn_table[chn_index] = new_sc; + + spin_lock_irqsave(&nvscdev->sc_lock, flags); + nvscdev->num_sc_offered--; + spin_unlock_irqrestore(&nvscdev->sc_lock, flags); + if (nvscdev->num_sc_offered == 0) + complete(&nvscdev->channel_init_wait); } int rndis_filter_device_add(struct hv_device *dev, -- cgit v0.10.2 From 6579a023a881e0592ce9a98fdfcbcc0a2a096aa7 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 23 Mar 2016 17:59:51 +0800 Subject: net: ping: make ping_v6_sendmsg static As ping_v6_sendmsg is used only in this file, making it static The body of "pingv6_prot" and "pingv6_protosw" were moved at the middle of the file, to avoid having to declare some static prototypes. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller diff --git a/include/net/ping.h b/include/net/ping.h index 5fd7cc2..4cd90d6 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -79,7 +79,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 263a516..c382db7 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -26,35 +26,6 @@ #include #include -struct proto pingv6_prot = { - .name = "PINGv6", - .owner = THIS_MODULE, - .init = ping_init_sock, - .close = ping_close, - .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, - .setsockopt = ipv6_setsockopt, - .getsockopt = ipv6_getsockopt, - .sendmsg = ping_v6_sendmsg, - .recvmsg = ping_recvmsg, - .bind = ping_bind, - .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_hash, - .unhash = ping_unhash, - .get_port = ping_get_port, - .obj_size = sizeof(struct raw6_sock), -}; -EXPORT_SYMBOL_GPL(pingv6_prot); - -static struct inet_protosw pingv6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_ICMPV6, - .prot = &pingv6_prot, - .ops = &inet6_dgram_ops, - .flags = INET_PROTOSW_REUSE, -}; - - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) +static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return len; } +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect_v6_only, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .flags = INET_PROTOSW_REUSE, +}; + #ifdef CONFIG_PROC_FS static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) { -- cgit v0.10.2