From 8c427f0626fe289f36ce65a60dcd602f6ef72777 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Wed, 7 Aug 2013 10:52:34 -0500 Subject: sata, highbank: fix ordering of SGPIO signals The ACTIVITY and ERROR signals were reversed in the original commit. Fix that so that hard drive activity does not show up on the error light, and attempts to indicate that the hard drive is failing do not show up as hard drive activity. This fixes a fairly serious functional bug in the driver, but failing to apply this patch will not cause any stability issues on the system. Signed-off-by: Mark Langsdorf Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index d047d92..e9a4f46 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -86,11 +86,11 @@ struct ecx_plat_data { #define SGPIO_SIGNALS 3 #define ECX_ACTIVITY_BITS 0x300000 -#define ECX_ACTIVITY_SHIFT 2 +#define ECX_ACTIVITY_SHIFT 0 #define ECX_LOCATE_BITS 0x80000 #define ECX_LOCATE_SHIFT 1 #define ECX_FAULT_BITS 0x400000 -#define ECX_FAULT_SHIFT 0 +#define ECX_FAULT_SHIFT 2 static inline int sgpio_bit_shift(struct ecx_plat_data *pdata, u32 port, u32 shift) { -- cgit v0.10.2 From 628e341f319f1a64a4639088faba952e4ec8f0a8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 14 Aug 2013 13:05:23 +0200 Subject: xfrm: make local error reporting more robust In xfrm4 and xfrm6 we need to take care about sockets of the other address family. This could happen because a 6in4 or 4in6 tunnel could get protected by ipsec. Because we don't want to have a run-time dependency on ipv6 when only using ipv4 xfrm we have to embed a pointer to the correct local_error function in xfrm_state_afinet and look it up when returning an error depending on the socket address family. Thanks to vi0ss for the great bug report: v2: a) fix two more unsafe interpretations of skb->sk as ipv6 socket (xfrm6_local_dontfrag and __xfrm6_output) v3: a) add an EXPORT_SYMBOL_GPL(xfrm_local_error) to fix a link error when building ipv6 as a module (thanks to Steffen Klassert) Reported-by: Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 94ce082..e823786 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -341,10 +341,13 @@ struct xfrm_state_afinfo { struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); + void (*local_error)(struct sk_buff *skb, u32 mtu); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); +extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -1477,6 +1480,7 @@ extern int xfrm_input_resume(struct sk_buff *skb, int nexthdr); extern int xfrm_output_resume(struct sk_buff *skb, int err); extern int xfrm_output(struct sk_buff *skb); extern int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); +extern void xfrm_local_error(struct sk_buff *skb, int mtu); extern int xfrm4_extract_header(struct sk_buff *skb); extern int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -1497,6 +1501,7 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short fam extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler); extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler); +extern void xfrm4_local_error(struct sk_buff *skb, u32 mtu); extern int xfrm6_extract_header(struct sk_buff *skb); extern int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); @@ -1514,6 +1519,7 @@ extern int xfrm6_output(struct sk_buff *skb); extern int xfrm6_output_finish(struct sk_buff *skb); extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); +extern void xfrm6_local_error(struct sk_buff *skb, u32 mtu); #ifdef CONFIG_XFRM extern int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 327a617..7a5491f 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -33,8 +33,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) mtu = dst_mtu(dst); if (skb->len > mtu) { if (skb->sk) - ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr, - inet_sk(skb->sk)->inet_dport, mtu); + xfrm_local_error(skb, mtu); else icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -99,3 +98,12 @@ int xfrm4_output(struct sk_buff *skb) x->outer_mode->afinfo->output_finish, !(IPCB(skb)->flags & IPSKB_REROUTED)); } + +void xfrm4_local_error(struct sk_buff *skb, u32 mtu) +{ + struct iphdr *hdr; + + hdr = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ip_local_error(skb->sk, EMSGSIZE, hdr->daddr, + inet_sk(skb->sk)->inet_dport, mtu); +} diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 9258e75..0b2a064 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -83,6 +83,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, + .local_error = xfrm4_local_error, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8755a30..b64fff3 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -34,8 +34,10 @@ static int xfrm6_local_dontfrag(struct sk_buff *skb) struct sock *sk = skb->sk; if (sk) { - proto = sk->sk_protocol; + if (sk->sk_family != AF_INET6) + return 0; + proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) return inet6_sk(sk)->dontfrag; } @@ -54,7 +56,7 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) ipv6_local_rxpmtu(sk, &fl6, mtu); } -static void xfrm6_local_error(struct sk_buff *skb, u32 mtu) +void xfrm6_local_error(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; struct sock *sk = skb->sk; @@ -80,7 +82,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (xfrm6_local_dontfrag(skb)) xfrm6_local_rxpmtu(skb, mtu); else if (skb->sk) - xfrm6_local_error(skb, mtu); + xfrm_local_error(skb, mtu); else icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; @@ -142,7 +144,7 @@ static int __xfrm6_output(struct sk_buff *skb) xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; } else if (!skb->local_df && skb->len > mtu && skb->sk) { - xfrm6_local_error(skb, mtu); + xfrm_local_error(skb, mtu); return -EMSGSIZE; } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index d8c70b8..3fc9701 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -183,6 +183,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, + .local_error = xfrm6_local_error, }; int __init xfrm6_state_init(void) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index eb4a842..6f5fc61 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -214,5 +214,18 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) return inner_mode->afinfo->extract_output(x, skb); } +void xfrm_local_error(struct sk_buff *skb, int mtu) +{ + struct xfrm_state_afinfo *afinfo; + + afinfo = xfrm_state_get_afinfo(skb->sk->sk_family); + if (!afinfo) + return; + + afinfo->local_error(skb, mtu); + xfrm_state_put_afinfo(afinfo); +} + EXPORT_SYMBOL_GPL(xfrm_output); EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); +EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 78f66fa..54c0acd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -39,9 +39,6 @@ static DEFINE_SPINLOCK(xfrm_state_lock); static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); - static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -1860,7 +1857,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1872,7 +1869,7 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) return afinfo; } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { rcu_read_unlock(); } -- cgit v0.10.2 From 0ea9d5e3e0e03a63b11392f5613378977dae7eca Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 13 Aug 2013 04:35:58 +0200 Subject: xfrm: introduce helper for safe determination of mtu skb->sk socket can be of AF_INET or AF_INET6 address family. Thus we always have to make sure we a referring to the correct interpretation of skb->sk. We only depend on header defines to query the mtu, so we don't introduce a new dependency to ipv6 by this change. Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert diff --git a/include/net/route.h b/include/net/route.h index 2ea40c1..afdeeb5 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -317,4 +317,12 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } +static inline int ip_skb_dst_mtu(struct sk_buff *skb) +{ + struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; + + return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +} + #endif /* _ROUTE_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e823786..b41d2d1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1723,4 +1724,15 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } +static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + if (sk && sk->sk_family == AF_INET6) + return ip6_skb_dst_mtu(skb); + else if (sk && sk->sk_family == AF_INET) + return ip_skb_dst_mtu(skb); + return dst_mtu(skb_dst(skb)); +} + #endif /* _NET_XFRM_H */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4bcabf3..9ee17e3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -211,14 +211,6 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -static inline int ip_skb_dst_mtu(struct sk_buff *skb) -{ - struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; - - return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} - static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 7a5491f..80baf4a 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -21,7 +21,6 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; - struct dst_entry *dst; if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; @@ -29,8 +28,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) goto out; - dst = skb_dst(skb); - mtu = dst_mtu(dst); + mtu = xfrm_skb_dst_mtu(skb); if (skb->len > mtu) { if (skb->sk) xfrm_local_error(skb, mtu); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b64fff3..3ac5ab2 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -138,7 +138,10 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu = ip6_skb_dst_mtu(skb); + int mtu = xfrm_skb_dst_mtu(skb); + + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); -- cgit v0.10.2 From 3f0fa9a808f98fa10a18ba2a73f13d65fda990fb Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 14 Aug 2013 16:05:02 -0700 Subject: regmap: Add another missing header for !CONFIG_REGMAP stubs The use of WARN_ON() needs the definitions from bug.h, without it you can get: include/linux/regmap.h: In function 'regmap_write': include/linux/regmap.h:525:2: error: implicit declaration of function 'WARN_ONCE' [-Werror=implicit-function-declaration] Signed-off-by: Kevin Hilman Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 580a532..6d91fcb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -16,6 +16,7 @@ #include #include #include +#include struct module; struct device; -- cgit v0.10.2 From 44512449c0ab368889dd13ae0031fba74ee7e1d2 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 15 Aug 2013 15:36:49 -0500 Subject: jfs: fix readdir cookie incompatibility with NFSv4 NFSv4 reserves readdir cookie values 0-2 for special entries (. and ..), but jfs allows a value of 2 for a non-special entry. This incompatibility can result in the nfs client reporting a readdir loop. This patch doesn't change the value stored internally, but adds one to the value exposed to the iterate method. Signed-off-by: Dave Kleikamp Tested-by: Christian Kujau diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 8743ba9..984c2bb 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3047,6 +3047,14 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) dir_index = (u32) ctx->pos; + /* + * NFSv4 reserves cookies 1 and 2 for . and .. so the value + * we return to the vfs is one greater than the one we use + * internally. + */ + if (dir_index) + dir_index--; + if (dir_index > 1) { struct dir_table_slot dirtab_slot; @@ -3086,7 +3094,7 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) if (p->header.flag & BT_INTERNAL) { jfs_err("jfs_readdir: bad index table"); DT_PUTPAGE(mp); - ctx->pos = -1; + ctx->pos = DIREND; return 0; } } else { @@ -3094,14 +3102,14 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) /* * self "." */ - ctx->pos = 0; + ctx->pos = 1; if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) return 0; } /* * parent ".." */ - ctx->pos = 1; + ctx->pos = 2; if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) return 0; @@ -3122,22 +3130,23 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." + * pn = 0; index = 1: First entry "." + * pn = 0; index = 2: Second entry ".." * pn > 0: Real entries, pn=1 -> leftmost page * pn = index = -1: No more entries */ dtpos = ctx->pos; - if (dtpos == 0) { + if (dtpos < 2) { /* build "." entry */ + ctx->pos = 1; if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) return 0; - dtoffset->index = 1; + dtoffset->index = 2; ctx->pos = dtpos; } if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { + if (dtoffset->index == 2) { /* build ".." entry */ if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) return 0; @@ -3228,6 +3237,12 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) } jfs_dirent->position = unique_pos++; } + /* + * We add 1 to the index because we may + * use a value of 2 internally, and NFSv4 + * doesn't like that. + */ + jfs_dirent->position++; } else { jfs_dirent->position = dtpos; len = min(d_namleft, DTLHDRDATALEN_LEGACY); -- cgit v0.10.2 From 884020bf3d2a3787a1cc6df902e98e0eec60330b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 6 Aug 2013 19:01:14 +0100 Subject: drm/i915: Invalidate TLBs for the rings after a reset After any "soft gfx reset" we must manually invalidate the TLBs associated with each ring. Empirically, it seems that a suspend/resume or D3-D0 cycle count as a "soft reset". The symptom is that the hardware would fail to note the new address for its status page, and so it would continue to write the shadow registers and breadcrumbs into the old physical address (now used by something completely different, scary). Whereas the driver would read the new status page and never see any progress, it would appear that the GPU hung immediately upon resume. Based on a patch by naresh kumar kachhi Reported-by: Thiago Macieira Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64725 Signed-off-by: Chris Wilson Tested-by: Thiago Macieira Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6f51429..53cddd9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -752,6 +752,8 @@ will not assert AGPBUSY# and will only be delivered when out of C3. */ #define INSTPM_FORCE_ORDERING (1<<7) /* GEN6+ */ +#define INSTPM_TLB_INVALIDATE (1<<9) +#define INSTPM_SYNC_FLUSH (1<<5) #define ACTHD 0x020c8 #define FW_BLC 0x020d8 #define FW_BLC2 0x020dc diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 664118d..079ef01 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -968,6 +968,18 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring) I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); POSTING_READ(mmio); + + /* Flush the TLB for this page */ + if (INTEL_INFO(dev)->gen >= 6) { + u32 reg = RING_INSTPM(ring->mmio_base); + I915_WRITE(reg, + _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | + INSTPM_SYNC_FLUSH)); + if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, + 1000)) + DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", + ring->name); + } } static int -- cgit v0.10.2 From 3d483058c8c8b87a167155ca9ddd776dd730bc39 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 18 Aug 2013 13:46:52 +0200 Subject: ipv6: wire up skb->encapsulation When pushing a new header before current one call skb_reset_inner_headers to record the position of the inner headers in the various ipv6 tunnel protocols. We later need this to correctly identify the addresses needed to send back an error in the xfrm layer. This change is safe, because skb->protocol is always checked before dereferencing data from the inner protocol. Cc: Steffen Klassert Cc: YOSHIFUJI Hideaki Cc: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ecd6073..90747f1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -724,6 +724,11 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb_push(skb, gre_hlen); skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(*ipv6h)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1e55866..46ba243 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1027,6 +1027,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a3437a4..fbfc5a8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -888,6 +888,11 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); -- cgit v0.10.2 From 5d0ff542d0264f61dc4bdb34eba39ffb4ea3bc23 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 18 Aug 2013 13:46:57 +0200 Subject: ipv6: xfrm: dereference inner ipv6 header if encapsulated In xfrm6_local_error use inner_header if the packet was encapsulated. Cc: Steffen Klassert Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 3ac5ab2..e092e30 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -59,10 +59,12 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) void xfrm6_local_error(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; + const struct ipv6hdr *hdr; struct sock *sk = skb->sk; + hdr = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); fl6.fl6_dport = inet_sk(sk)->inet_dport; - fl6.daddr = ipv6_hdr(skb)->daddr; + fl6.daddr = hdr->daddr; ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); } -- cgit v0.10.2 From 844d48746e4b281a933aedc0428048a1219b42f4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 18 Aug 2013 13:47:01 +0200 Subject: xfrm: choose protocol family by skb protocol We need to choose the protocol family by skb->protocol. Otherwise we call the wrong xfrm{4,6}_local_error handler in case an ipv6 sockets is used in ipv4 mode, in which case we should call down to xfrm4_local_error (ip6 sockets are a superset of ip4 ones). We are called before before ip_output functions, so skb->protocol is not reset. Cc: Steffen Klassert Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b41d2d1..ac5b025 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1728,9 +1728,9 @@ static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (sk && sk->sk_family == AF_INET6) + if (sk && skb->protocol == htons(ETH_P_IPV6)) return ip6_skb_dst_mtu(skb); - else if (sk && sk->sk_family == AF_INET) + else if (sk && skb->protocol == htons(ETH_P_IP)) return ip_skb_dst_mtu(skb); return dst_mtu(skb_dst(skb)); } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6f5fc61..3bb2cdc 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -216,9 +216,17 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) void xfrm_local_error(struct sk_buff *skb, int mtu) { + unsigned int proto; struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(skb->sk->sk_family); + if (skb->protocol == htons(ETH_P_IP)) + proto = AF_INET; + else if (skb->protocol == htons(ETH_P_IPV6)) + proto = AF_INET6; + else + return; + + afinfo = xfrm_state_get_afinfo(proto); if (!afinfo) return; -- cgit v0.10.2 From 8ffff94d20b7eb446e848e0046107d51b17a20a8 Mon Sep 17 00:00:00 2001 From: Terry Suereth Date: Sat, 17 Aug 2013 15:53:12 -0400 Subject: libata: apply behavioral quirks to sil3826 PMP Fixing support for the Silicon Image 3826 port multiplier, by applying to it the same quirks applied to the Silicon Image 3726. Specifically fixes the repeated timeout/reset process which previously afflicted the 3726, as described from line 290. Slightly based on notes from: https://bugzilla.redhat.com/show_bug.cgi?id=890237 Signed-off-by: Terry Suereth Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 1c41722..20fd337 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -289,24 +289,24 @@ static int sata_pmp_configure(struct ata_device *dev, int print_info) /* Disable sending Early R_OK. * With "cached read" HDD testing and multiple ports busy on a SATA - * host controller, 3726 PMP will very rarely drop a deferred + * host controller, 3x26 PMP will very rarely drop a deferred * R_OK that was intended for the host. Symptom will be all * 5 drives under test will timeout, get reset, and recover. */ - if (vendor == 0x1095 && devid == 0x3726) { + if (vendor == 0x1095 && (devid == 0x3726 || devid == 0x3826)) { u32 reg; err_mask = sata_pmp_read(&ap->link, PMP_GSCR_SII_POL, ®); if (err_mask) { rc = -EIO; - reason = "failed to read Sil3726 Private Register"; + reason = "failed to read Sil3x26 Private Register"; goto fail; } reg &= ~0x1; err_mask = sata_pmp_write(&ap->link, PMP_GSCR_SII_POL, reg); if (err_mask) { rc = -EIO; - reason = "failed to write Sil3726 Private Register"; + reason = "failed to write Sil3x26 Private Register"; goto fail; } } @@ -383,8 +383,8 @@ static void sata_pmp_quirks(struct ata_port *ap) u16 devid = sata_pmp_gscr_devid(gscr); struct ata_link *link; - if (vendor == 0x1095 && devid == 0x3726) { - /* sil3726 quirks */ + if (vendor == 0x1095 && (devid == 0x3726 || devid == 0x3826)) { + /* sil3x26 quirks */ ata_for_each_link(link, ap, EDGE) { /* link reports offline after LPM */ link->flags |= ATA_LFLAG_NO_LPM; -- cgit v0.10.2 From d3d3835ce919438c00c5d1270d6f9d6ffea59d03 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 19 Aug 2013 20:05:50 +0200 Subject: ALSA: hda - Add inverted digital mic fixup for Acer Aspire One Yet another entry, just use the existing fixup for this machine, too. Reported-by: "Nathanael D. Noblet" Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f303cd8..389db4c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4336,6 +4336,7 @@ static const struct hda_fixup alc662_fixups[] = { static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1019, 0x9087, "ECS", ALC662_FIXUP_ASUS_MODE2), + SND_PCI_QUIRK(0x1025, 0x022f, "Acer Aspire One", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), -- cgit v0.10.2 From 71f42642af7b1bc7c36fb73ad8f7e7ab2ab8b2de Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Sun, 18 Aug 2013 13:59:00 +0100 Subject: iio: adjd_s311: Fix non-scan mode data read forgot to convert channel index to data register Signed-off-by: Peter Meerwald Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/light/adjd_s311.c b/drivers/iio/light/adjd_s311.c index 5f4749e..c1cd569 100644 --- a/drivers/iio/light/adjd_s311.c +++ b/drivers/iio/light/adjd_s311.c @@ -232,7 +232,8 @@ static int adjd_s311_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = adjd_s311_read_data(indio_dev, chan->address, val); + ret = adjd_s311_read_data(indio_dev, + ADJD_S311_DATA_REG(chan->address), val); if (ret < 0) return ret; return IIO_VAL_INT; -- cgit v0.10.2 From 7cb3be0a27805c625ff7cce20c53c926d9483243 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 16 Aug 2013 12:55:56 +0100 Subject: ARM: 7819/1: fiq: Cast the first argument of flush_icache_range() Commit 2ba85e7af4 (ARM: Fix FIQ code on VIVT CPUs) causes the following build warning: arch/arm/kernel/fiq.c:92:3: warning: passing argument 1 of 'cpu_cache.coherent_kern_range' makes integer from pointer without a cast [enabled by default] Cast it as '(unsigned long)base' to avoid the warning. Signed-off-by: Fabio Estevam Signed-off-by: Russell King diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index fc79202..918875d 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -89,7 +89,8 @@ void set_fiq_handler(void *start, unsigned int length) memcpy(base + offset, start, length); if (!cache_is_vipt_nonaliasing()) - flush_icache_range(base + offset, offset + length); + flush_icache_range((unsigned long)base + offset, offset + + length); flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length); } -- cgit v0.10.2 From 4f9b4fb7a2091eec339413a460b1665758401828 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Wed, 14 Aug 2013 13:28:28 +0100 Subject: ARM: 7815/1: kexec: offline non panic CPUs on Kdump panic In case of normal kexec kernel load, all cpu's are offlined before calling machine_kexec().But in case crash panic cpus are relaxed in machine_crash_nonpanic_core() SMP function but not offlined. When crash kernel is loaded with kexec and on panic trigger machine_kexec() checks for number of cpus online. If more than one cpu is online machine_kexec() fails to load with below error kexec: error: multiple CPUs still online In machine_crash_nonpanic_core() SMP function, offline CPU before cpu_relax Signed-off-by: Vijaya Kumar K Acked-by: Stephen Warren Acked-by: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index d7c82df..57221e3 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -82,6 +82,7 @@ void machine_crash_nonpanic_core(void *unused) crash_save_cpu(®s, smp_processor_id()); flush_cache_all(); + set_cpu_online(smp_processor_id(), false); atomic_dec(&waiting_for_crash_ipi); while (1) cpu_relax(); -- cgit v0.10.2 From ac124504ecf6b20a2457d873d0728a8b991a5b0c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 14 Aug 2013 22:36:32 +0100 Subject: ARM: 7816/1: CONFIG_KUSER_HELPERS: fix help text Commit f6f91b0d9fd9 ("ARM: allow kuser helpers to be removed from the vector page") introduced some help text for the CONFIG_KUSER_HELPERS option which is rather contradictory. Let's fix that, and improve it a little. Cc: Signed-off-by: Nicolas Pitre Signed-off-by: Russell King diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index db5c2ca..cd2c88e 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -809,15 +809,18 @@ config KUSER_HELPERS the CPU type fitted to the system. This permits binaries to be run on ARMv4 through to ARMv7 without modification. + See Documentation/arm/kernel_user_helpers.txt for details. + However, the fixed address nature of these helpers can be used by ROP (return orientated programming) authors when creating exploits. If all of the binaries and libraries which run on your platform are built specifically for your platform, and make no use of - these helpers, then you can turn this option off. However, - when such an binary or library is run, it will receive a SIGILL - signal, which will terminate the program. + these helpers, then you can turn this option off to hinder + such exploits. However, in that case, if a binary or library + relying on those helpers is run, it will receive a SIGILL signal, + which will terminate the program. Say N here only if you are absolutely certain that you do not need these helpers; otherwise, the safe option is to say Y. -- cgit v0.10.2 From 99bbdfa6bdcb4bdf5be914a48e9b46941bf30819 Mon Sep 17 00:00:00 2001 From: Anthony Foiani Date: Mon, 19 Aug 2013 19:20:30 -0600 Subject: sata_fsl: save irqs while coalescing Before this patch, I was seeing the following lockdep splat on my MPC8315 (PPC32) target: [ 9.086051] ================================= [ 9.090393] [ INFO: inconsistent lock state ] [ 9.094744] 3.9.7-ajf-gc39503d #1 Not tainted [ 9.099087] --------------------------------- [ 9.103432] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 9.109431] scsi_eh_1/39 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 9.114642] (&(&host->lock)->rlock){?.+...}, at: [] sata_fsl_interrupt+0x50/0x250 [ 9.123137] {HARDIRQ-ON-W} state was registered at: [ 9.128004] [] lock_acquire+0x90/0xf4 [ 9.132737] [] _raw_spin_lock+0x34/0x4c [ 9.137645] [] fsl_sata_set_irq_coalescing+0x68/0x100 [ 9.143750] [] sata_fsl_init_controller+0xa8/0xc0 [ 9.149505] [] sata_fsl_probe+0x17c/0x2e8 [ 9.154568] [] driver_probe_device+0x90/0x248 [ 9.159987] [] __driver_attach+0xc4/0xc8 [ 9.164964] [] bus_for_each_dev+0x5c/0xa8 [ 9.170028] [] bus_add_driver+0x100/0x26c [ 9.175091] [] driver_register+0x88/0x198 [ 9.180155] [] do_one_initcall+0x58/0x1b4 [ 9.185226] [] kernel_init_freeable+0x118/0x1c0 [ 9.190823] [] kernel_init+0x18/0x108 [ 9.195542] [] ret_from_kernel_thread+0x64/0x6c [ 9.201142] irq event stamp: 160 [ 9.204366] hardirqs last enabled at (159): [] _raw_spin_unlock_irq+0x30/0x50 [ 9.212469] hardirqs last disabled at (160): [] reenable_mmu+0x30/0x88 [ 9.219867] softirqs last enabled at (144): [] __do_softirq+0x168/0x218 [ 9.227435] softirqs last disabled at (137): [] irq_exit+0xa8/0xb4 [ 9.234481] [ 9.234481] other info that might help us debug this: [ 9.240995] Possible unsafe locking scenario: [ 9.240995] [ 9.246898] CPU0 [ 9.249337] ---- [ 9.251776] lock(&(&host->lock)->rlock); [ 9.255878] [ 9.258492] lock(&(&host->lock)->rlock); [ 9.262765] [ 9.262765] *** DEADLOCK *** [ 9.262765] [ 9.268684] no locks held by scsi_eh_1/39. [ 9.272767] [ 9.272767] stack backtrace: [ 9.277117] Call Trace: [ 9.279589] [cfff9da0] [c0008504] show_stack+0x48/0x150 (unreliable) [ 9.285972] [cfff9de0] [c0447d5c] print_usage_bug.part.35+0x268/0x27c [ 9.292425] [cfff9e10] [c006ace4] mark_lock+0x2ac/0x658 [ 9.297660] [cfff9e40] [c006b7e4] __lock_acquire+0x754/0x1840 [ 9.303414] [cfff9ee0] [c006cdb8] lock_acquire+0x90/0xf4 [ 9.308745] [cfff9f20] [c043ef04] _raw_spin_lock+0x34/0x4c [ 9.314250] [cfff9f30] [c02f4168] sata_fsl_interrupt+0x50/0x250 [ 9.320187] [cfff9f70] [c0079ff0] handle_irq_event_percpu+0x90/0x254 [ 9.326547] [cfff9fc0] [c007a1fc] handle_irq_event+0x48/0x78 [ 9.332220] [cfff9fe0] [c007c95c] handle_level_irq+0x9c/0x104 [ 9.337981] [cfff9ff0] [c000d978] call_handle_irq+0x18/0x28 [ 9.343568] [cc7139f0] [c000608c] do_IRQ+0xf0/0x1a8 [ 9.348464] [cc713a20] [c000fc8c] ret_from_except+0x0/0x14 [ 9.353983] --- Exception: 501 at _raw_spin_unlock_irq+0x40/0x50 [ 9.353983] LR = _raw_spin_unlock_irq+0x30/0x50 [ 9.364839] [cc713af0] [c043db10] wait_for_common+0xac/0x188 [ 9.370513] [cc713b30] [c02ddee4] ata_exec_internal_sg+0x2b0/0x4f0 [ 9.376699] [cc713be0] [c02de18c] ata_exec_internal+0x68/0xa8 [ 9.382454] [cc713c20] [c02de4b8] ata_dev_read_id+0x158/0x594 [ 9.388205] [cc713ca0] [c02ec244] ata_eh_recover+0xd88/0x13d0 [ 9.393962] [cc713d20] [c02f2520] sata_pmp_error_handler+0xc0/0x8ac [ 9.400234] [cc713dd0] [c02ecdc8] ata_scsi_port_error_handler+0x464/0x5e8 [ 9.407023] [cc713e10] [c02ecfd0] ata_scsi_error+0x84/0xb8 [ 9.412528] [cc713e40] [c02c4974] scsi_error_handler+0xd8/0x47c [ 9.418457] [cc713eb0] [c004737c] kthread+0xa8/0xac [ 9.423355] [cc713f40] [c000f6b8] ret_from_kernel_thread+0x64/0x6c This fix was suggested by Bhushan Bharat , and was discussed in email at: http://linuxppc.10917.n7.nabble.com/MPC8315-reboot-failure-lockdep-splat-possibly-related-tp75162.html Same patch successfully tested with 3.9.7. linux-next compiled but not tested on hardware. This patch is based off linux-next tag next-20130819 (which is commit 66a01bae29d11916c09f9f5a937cafe7d402e4a5 ) Signed-off-by: Anthony Foiani Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 19720a0..851bd3f 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -293,6 +293,7 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host, { struct sata_fsl_host_priv *host_priv = host->private_data; void __iomem *hcr_base = host_priv->hcr_base; + unsigned long flags; if (count > ICC_MAX_INT_COUNT_THRESHOLD) count = ICC_MAX_INT_COUNT_THRESHOLD; @@ -305,12 +306,12 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host, (count > ICC_MIN_INT_COUNT_THRESHOLD)) ticks = ICC_SAFE_INT_TICKS; - spin_lock(&host->lock); + spin_lock_irqsave(&host->lock, flags); iowrite32((count << 24 | ticks), hcr_base + ICC); intr_coalescing_count = count; intr_coalescing_ticks = ticks; - spin_unlock(&host->lock); + spin_unlock_irqrestore(&host->lock, flags); DPRINTK("interrupt coalescing, count = 0x%x, ticks = %x\n", intr_coalescing_count, intr_coalescing_ticks); -- cgit v0.10.2 From 6284bf41b97fb36ed96b664a3c23b6dc3661f5f9 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 9 Aug 2013 17:25:54 -0400 Subject: drm/nouveau/fb: fix null derefs in nv49 and nv4e init Commit dceef5d87 (drm/nouveau/fb: initialise vram controller as pfb sub-object) moved some code around and introduced these null derefs. pfb->ram is set to the new ram object outside of this ctor. Reported-by: Ronald Uitermark Tested-by: Ronald Uitermark Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c index 19e3a9a..ab7ef0a 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c @@ -40,15 +40,15 @@ nv49_ram_create(struct nouveau_object *parent, struct nouveau_object *engine, return ret; switch (pfb914 & 0x00000003) { - case 0x00000000: pfb->ram->type = NV_MEM_TYPE_DDR1; break; - case 0x00000001: pfb->ram->type = NV_MEM_TYPE_DDR2; break; - case 0x00000002: pfb->ram->type = NV_MEM_TYPE_GDDR3; break; + case 0x00000000: ram->type = NV_MEM_TYPE_DDR1; break; + case 0x00000001: ram->type = NV_MEM_TYPE_DDR2; break; + case 0x00000002: ram->type = NV_MEM_TYPE_GDDR3; break; case 0x00000003: break; } - pfb->ram->size = nv_rd32(pfb, 0x10020c) & 0xff000000; - pfb->ram->parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1; - pfb->ram->tags = nv_rd32(pfb, 0x100320); + ram->size = nv_rd32(pfb, 0x10020c) & 0xff000000; + ram->parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1; + ram->tags = nv_rd32(pfb, 0x100320); return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c index 7192aa6..63a6aab 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c @@ -38,8 +38,8 @@ nv4e_ram_create(struct nouveau_object *parent, struct nouveau_object *engine, if (ret) return ret; - pfb->ram->size = nv_rd32(pfb, 0x10020c) & 0xff000000; - pfb->ram->type = NV_MEM_TYPE_STOLEN; + ram->size = nv_rd32(pfb, 0x10020c) & 0xff000000; + ram->type = NV_MEM_TYPE_STOLEN; return 0; } -- cgit v0.10.2 From 52f9a4d71b5ba980ea72013191e907127bc44eeb Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 14 Aug 2013 10:38:53 +0200 Subject: drm/nouveau/ltcg: fix ltcg memory initialization after suspend Some registers were not initialized in init, this causes them to be uninitialized after suspend. Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c index bcca883..ee2a0be 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c @@ -30,8 +30,9 @@ struct nvc0_ltcg_priv { struct nouveau_ltcg base; u32 part_nr; u32 subp_nr; - struct nouveau_mm tags; u32 num_tags; + u32 tag_base; + struct nouveau_mm tags; struct nouveau_mm_node *tag_ram; }; @@ -117,10 +118,6 @@ nvc0_ltcg_init_tag_ram(struct nouveau_fb *pfb, struct nvc0_ltcg_priv *priv) u32 tag_size, tag_margin, tag_align; int ret; - nv_wr32(priv, 0x17e8d8, priv->part_nr); - if (nv_device(pfb)->card_type >= NV_E0) - nv_wr32(priv, 0x17e000, priv->part_nr); - /* tags for 1/4 of VRAM should be enough (8192/4 per GiB of VRAM) */ priv->num_tags = (pfb->ram->size >> 17) / 4; if (priv->num_tags > (1 << 17)) @@ -152,7 +149,7 @@ nvc0_ltcg_init_tag_ram(struct nouveau_fb *pfb, struct nvc0_ltcg_priv *priv) tag_base += tag_align - 1; ret = do_div(tag_base, tag_align); - nv_wr32(priv, 0x17e8d4, tag_base); + priv->tag_base = tag_base; } ret = nouveau_mm_init(&priv->tags, 0, priv->num_tags, 1); @@ -182,8 +179,6 @@ nvc0_ltcg_ctor(struct nouveau_object *parent, struct nouveau_object *engine, } priv->subp_nr = nv_rd32(priv, 0x17e8dc) >> 28; - nv_mask(priv, 0x17e820, 0x00100000, 0x00000000); /* INTR_EN &= ~0x10 */ - ret = nvc0_ltcg_init_tag_ram(pfb, priv); if (ret) return ret; @@ -209,13 +204,32 @@ nvc0_ltcg_dtor(struct nouveau_object *object) nouveau_ltcg_destroy(ltcg); } +static int +nvc0_ltcg_init(struct nouveau_object *object) +{ + struct nouveau_ltcg *ltcg = (struct nouveau_ltcg *)object; + struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg; + int ret; + + ret = nouveau_ltcg_init(ltcg); + if (ret) + return ret; + + nv_mask(priv, 0x17e820, 0x00100000, 0x00000000); /* INTR_EN &= ~0x10 */ + nv_wr32(priv, 0x17e8d8, priv->part_nr); + if (nv_device(ltcg)->card_type >= NV_E0) + nv_wr32(priv, 0x17e000, priv->part_nr); + nv_wr32(priv, 0x17e8d4, priv->tag_base); + return 0; +} + struct nouveau_oclass nvc0_ltcg_oclass = { .handle = NV_SUBDEV(LTCG, 0xc0), .ofuncs = &(struct nouveau_ofuncs) { .ctor = nvc0_ltcg_ctor, .dtor = nvc0_ltcg_dtor, - .init = _nouveau_ltcg_init, + .init = nvc0_ltcg_init, .fini = _nouveau_ltcg_fini, }, }; -- cgit v0.10.2 From 18902bbf06a93ea497be2f177a6323ed9e5f30be Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 12 Aug 2013 12:41:34 +0200 Subject: drm/nouveau/ltcg: fix allocating memory as free Allocating type=0 marks the memory as free. This allows the ltcg memory to be allocated twice. Add a BUG_ON in core/mm.c to prevent this ever happening again. Signed-off-by: Maarten Lankhorst Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/core/mm.c b/drivers/gpu/drm/nouveau/core/core/mm.c index d829172..7a4e089 100644 --- a/drivers/gpu/drm/nouveau/core/core/mm.c +++ b/drivers/gpu/drm/nouveau/core/core/mm.c @@ -98,6 +98,8 @@ nouveau_mm_head(struct nouveau_mm *mm, u8 type, u32 size_max, u32 size_min, u32 splitoff; u32 s, e; + BUG_ON(!type); + list_for_each_entry(this, &mm->free, fl_entry) { e = this->offset + this->length; s = this->offset; @@ -162,6 +164,8 @@ nouveau_mm_tail(struct nouveau_mm *mm, u8 type, u32 size_max, u32 size_min, struct nouveau_mm_node *prev, *this, *next; u32 mask = align - 1; + BUG_ON(!type); + list_for_each_entry_reverse(this, &mm->free, fl_entry) { u32 e = this->offset + this->length; u32 s = this->offset; diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c index ee2a0be..cce65cc 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c @@ -139,7 +139,7 @@ nvc0_ltcg_init_tag_ram(struct nouveau_fb *pfb, struct nvc0_ltcg_priv *priv) tag_size += tag_align; tag_size = (tag_size + 0xfff) >> 12; /* round up */ - ret = nouveau_mm_tail(&pfb->vram, 0, tag_size, tag_size, 1, + ret = nouveau_mm_tail(&pfb->vram, 1, tag_size, tag_size, 1, &priv->tag_ram); if (ret) { priv->num_tags = 0; -- cgit v0.10.2 From 0ff42c5af6031c776d6079d67cd2615121e62fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 16 Aug 2013 14:57:07 +0200 Subject: drm/nouveau: fix reclocking on nv40 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 77145f1cbdf8d28b46ff8070ca749bad821e0774 was introduced error which cause that reclocking on nv40 not working anymore. There is missing assigment of return value from pll_calc to ret. Signed-off-by: Pali Rohár Signed-off-by: Martin Peres Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/nv40_pm.c b/drivers/gpu/drm/nouveau/nv40_pm.c index 3af5bcd..625f80d 100644 --- a/drivers/gpu/drm/nouveau/nv40_pm.c +++ b/drivers/gpu/drm/nouveau/nv40_pm.c @@ -131,7 +131,7 @@ nv40_calc_pll(struct drm_device *dev, u32 reg, struct nvbios_pll *pll, if (clk < pll->vco1.max_freq) pll->vco2.max_freq = 0; - pclk->pll_calc(pclk, pll, clk, &coef); + ret = pclk->pll_calc(pclk, pll, clk, &coef); if (ret == 0) return -ERANGE; -- cgit v0.10.2 From 6ff8c76a566f823d796359a6c1d76b7668f1e34d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 21 Aug 2013 10:13:30 +1000 Subject: drm/nouveau/mc: fix race condition between constructor and request_irq() Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h index d550226..9d2cd20 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h @@ -20,8 +20,8 @@ nouveau_mc(void *obj) return (void *)nv_device(obj)->subdev[NVDEV_SUBDEV_MC]; } -#define nouveau_mc_create(p,e,o,d) \ - nouveau_mc_create_((p), (e), (o), sizeof(**d), (void **)d) +#define nouveau_mc_create(p,e,o,m,d) \ + nouveau_mc_create_((p), (e), (o), (m), sizeof(**d), (void **)d) #define nouveau_mc_destroy(p) ({ \ struct nouveau_mc *pmc = (p); _nouveau_mc_dtor(nv_object(pmc)); \ }) @@ -33,7 +33,8 @@ nouveau_mc(void *obj) }) int nouveau_mc_create_(struct nouveau_object *, struct nouveau_object *, - struct nouveau_oclass *, int, void **); + struct nouveau_oclass *, const struct nouveau_mc_intr *, + int, void **); void _nouveau_mc_dtor(struct nouveau_object *); int _nouveau_mc_init(struct nouveau_object *); int _nouveau_mc_fini(struct nouveau_object *, bool); diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c index 1c0330b..ec9cd6f 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c @@ -80,7 +80,9 @@ _nouveau_mc_dtor(struct nouveau_object *object) int nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine, - struct nouveau_oclass *oclass, int length, void **pobject) + struct nouveau_oclass *oclass, + const struct nouveau_mc_intr *intr_map, + int length, void **pobject) { struct nouveau_device *device = nv_device(parent); struct nouveau_mc *pmc; @@ -92,6 +94,8 @@ nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine, if (ret) return ret; + pmc->intr_map = intr_map; + ret = request_irq(device->pdev->irq, nouveau_mc_intr, IRQF_SHARED, "nouveau", pmc); if (ret < 0) diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c index 8c76971..64aa4ed 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c @@ -50,12 +50,11 @@ nv04_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv04_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv04_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv04_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c index 5191937..d989178 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c @@ -36,12 +36,11 @@ nv44_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv44_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv04_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv04_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c index f25fc5f..2b1afe2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c @@ -53,12 +53,11 @@ nv50_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv50_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv50_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv50_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c index e82fd21..0d57b4d 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c @@ -54,12 +54,11 @@ nv98_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nv98_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nv98_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nv98_mc_intr; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c index c5da3ba..104175c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c @@ -57,12 +57,11 @@ nvc0_mc_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nvc0_mc_priv *priv; int ret; - ret = nouveau_mc_create(parent, engine, oclass, &priv); + ret = nouveau_mc_create(parent, engine, oclass, nvc0_mc_intr, &priv); *pobject = nv_object(priv); if (ret) return ret; - priv->base.intr_map = nvc0_mc_intr; return 0; } -- cgit v0.10.2 From 78ae0ad403daf11cf63da86923d2b5dbeda3af8f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 21 Aug 2013 11:30:36 +1000 Subject: drm/nv04/disp: fix framebuffer pin refcounting Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c index 0782bd2..6a13ffb 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c @@ -606,6 +606,24 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode) regp->ramdac_a34 = 0x1; } +static int +nv_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb) +{ + struct nv04_display *disp = nv04_display(crtc->dev); + struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb); + struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + int ret; + + ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM); + if (ret == 0) { + if (disp->image[nv_crtc->index]) + nouveau_bo_unpin(disp->image[nv_crtc->index]); + nouveau_bo_ref(nvfb->nvbo, &disp->image[nv_crtc->index]); + } + + return ret; +} + /** * Sets up registers for the given mode/adjusted_mode pair. * @@ -622,10 +640,15 @@ nv_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_device *dev = crtc->dev; struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct nouveau_drm *drm = nouveau_drm(dev); + int ret; NV_DEBUG(drm, "CTRC mode on CRTC %d:\n", nv_crtc->index); drm_mode_debug_printmodeline(adjusted_mode); + ret = nv_crtc_swap_fbs(crtc, old_fb); + if (ret) + return ret; + /* unlock must come after turning off FP_TG_CONTROL in output_prepare */ nv_lock_vga_crtc_shadow(dev, nv_crtc->index, -1); @@ -722,6 +745,7 @@ static void nv_crtc_commit(struct drm_crtc *crtc) static void nv_crtc_destroy(struct drm_crtc *crtc) { + struct nv04_display *disp = nv04_display(crtc->dev); struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); if (!nv_crtc) @@ -729,6 +753,10 @@ static void nv_crtc_destroy(struct drm_crtc *crtc) drm_crtc_cleanup(crtc); + if (disp->image[nv_crtc->index]) + nouveau_bo_unpin(disp->image[nv_crtc->index]); + nouveau_bo_ref(NULL, &disp->image[nv_crtc->index]); + nouveau_bo_unmap(nv_crtc->cursor.nvbo); nouveau_bo_unpin(nv_crtc->cursor.nvbo); nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo); @@ -754,6 +782,16 @@ nv_crtc_gamma_load(struct drm_crtc *crtc) } static void +nv_crtc_disable(struct drm_crtc *crtc) +{ + struct nv04_display *disp = nv04_display(crtc->dev); + struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + if (disp->image[nv_crtc->index]) + nouveau_bo_unpin(disp->image[nv_crtc->index]); + nouveau_bo_ref(NULL, &disp->image[nv_crtc->index]); +} + +static void nv_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, uint32_t start, uint32_t size) { @@ -791,7 +829,6 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc, struct drm_framebuffer *drm_fb; struct nouveau_framebuffer *fb; int arb_burst, arb_lwm; - int ret; NV_DEBUG(drm, "index %d\n", nv_crtc->index); @@ -801,10 +838,8 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc, return 0; } - /* If atomic, we want to switch to the fb we were passed, so - * now we update pointers to do that. (We don't pin; just - * assume we're already pinned and update the base address.) + * now we update pointers to do that. */ if (atomic) { drm_fb = passed_fb; @@ -812,17 +847,6 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc, } else { drm_fb = crtc->fb; fb = nouveau_framebuffer(crtc->fb); - /* If not atomic, we can go ahead and pin, and unpin the - * old fb we were passed. - */ - ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM); - if (ret) - return ret; - - if (passed_fb) { - struct nouveau_framebuffer *ofb = nouveau_framebuffer(passed_fb); - nouveau_bo_unpin(ofb->nvbo); - } } nv_crtc->fb.offset = fb->nvbo->bo.offset; @@ -877,6 +901,9 @@ static int nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { + int ret = nv_crtc_swap_fbs(crtc, old_fb); + if (ret) + return ret; return nv04_crtc_do_mode_set_base(crtc, old_fb, x, y, false); } @@ -1027,6 +1054,7 @@ static const struct drm_crtc_helper_funcs nv04_crtc_helper_funcs = { .mode_set_base = nv04_crtc_mode_set_base, .mode_set_base_atomic = nv04_crtc_mode_set_base_atomic, .load_lut = nv_crtc_gamma_load, + .disable = nv_crtc_disable, }; int diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h index a0a031d..9928187 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h @@ -81,6 +81,7 @@ struct nv04_display { uint32_t saved_vga_font[4][16384]; uint32_t dac_users[4]; struct nouveau_object *core; + struct nouveau_bo *image[2]; }; static inline struct nv04_display * diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 907d20e..a03e75d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -577,6 +577,9 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, ret = nv50_display_flip_next(crtc, fb, chan, 0); if (ret) goto fail_unreserve; + } else { + struct nv04_display *dispnv04 = nv04_display(dev); + nouveau_bo_ref(new_bo, &dispnv04->image[nouveau_crtc(crtc)->index]); } ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence); -- cgit v0.10.2 From 1c09b195d37fa459844036f429a0f378e70c3db6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 21 Aug 2013 10:22:28 +0800 Subject: cpuset: fix a regression in validating config change It's not allowed to clear masks of a cpuset if there're tasks in it, but it's broken: # mkdir /cgroup/sub # echo 0 > /cgroup/sub/cpuset.cpus # echo 0 > /cgroup/sub/cpuset.mems # echo $$ > /cgroup/sub/tasks # echo > /cgroup/sub/cpuset.cpus (should fail) This bug was introduced by commit 88fa523bff295f1d60244a54833480b02f775152 ("cpuset: allow to move tasks to empty cpusets"). tj: Dropped temp bool variables and nestes the conditionals directly. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 010a008..ea1966d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -475,13 +475,17 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) /* * Cpusets with tasks - existing or newly being attached - can't - * have empty cpus_allowed or mems_allowed. + * be changed to have empty cpus_allowed or mems_allowed. */ ret = -ENOSPC; - if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) && - (cpumask_empty(trial->cpus_allowed) && - nodes_empty(trial->mems_allowed))) - goto out; + if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress)) { + if (!cpumask_empty(cur->cpus_allowed) && + cpumask_empty(trial->cpus_allowed)) + goto out; + if (!nodes_empty(cur->mems_allowed) && + nodes_empty(trial->mems_allowed)) + goto out; + } ret = 0; out: -- cgit v0.10.2 From 4e67fb5f5e336250db944921e3c68057d6203034 Mon Sep 17 00:00:00 2001 From: David Jander Date: Wed, 21 Aug 2013 17:37:22 +0200 Subject: regmap: rbtree: Fix overlapping rbnodes. Avoid overlapping register regions by making the initial blklen of a new node 1. If a register write occurs to a yet uncached register, that is lower than but near an existing node's base_reg, a new node is created and it's blklen is set to an arbitrary value (sizeof(*rbnode)). That may cause this node to overlap with another node. Those nodes should be merged, but this merge doesn't happen yet, so this patch at least makes the initial blklen small enough to avoid hitting the wrong node, which may otherwise lead to severe breakage. Signed-off-by: David Jander Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 5c1435c..0fccc99 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -332,7 +332,7 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg) } if (!rbnode->blklen) { - rbnode->blklen = sizeof(*rbnode); + rbnode->blklen = 1; rbnode->base_reg = reg; } -- cgit v0.10.2 From f5944daa0a72316077435c18a6571e73ed338332 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 8 Aug 2013 17:47:34 +1000 Subject: [SCSI] lpfc: Don't force CONFIG_GENERIC_CSUM on We want ppc64 to be able to select between optimised assembly checksum routines in big endian and the generic lib/checksum.c routines in little endian. The lpfc driver is forcing CONFIG_GENERIC_CSUM on which means we are unable to make the decision to enable it in the arch Kconfig. If the option exists it is always forced on. This got introduced in 3.10 via commit 6a7252fdb0c3 ([SCSI] lpfc: fix up Kconfig dependencies). I spoke to Randy about it and the original issue was with CRC_T10DIF not being defined. As such, remove the select of CONFIG_GENERIC_CSUM. Signed-off-by: Anton Blanchard Cc: # 3.10 Signed-off-by: James Bottomley diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 48b2918..92ff027 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1353,7 +1353,6 @@ config SCSI_LPFC tristate "Emulex LightPulse Fibre Channel Support" depends on PCI && SCSI select SCSI_FC_ATTRS - select GENERIC_CSUM select CRC_T10DIF help This lpfc driver supports the Emulex LightPulse -- cgit v0.10.2 From 35dc248383bbab0a7203fca4d722875bc81ef091 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 5 Aug 2013 17:55:01 -0700 Subject: [SCSI] sg: Fix user memory corruption when SG_IO is interrupted by a signal There is a nasty bug in the SCSI SG_IO ioctl that in some circumstances leads to one process writing data into the address space of some other random unrelated process if the ioctl is interrupted by a signal. What happens is the following: - A process issues an SG_IO ioctl with direction DXFER_FROM_DEV (ie the underlying SCSI command will transfer data from the SCSI device to the buffer provided in the ioctl) - Before the command finishes, a signal is sent to the process waiting in the ioctl. This will end up waking up the sg_ioctl() code: result = wait_event_interruptible(sfp->read_wait, (srp_done(sfp, srp) || sdp->detached)); but neither srp_done() nor sdp->detached is true, so we end up just setting srp->orphan and returning to userspace: srp->orphan = 1; write_unlock_irq(&sfp->rq_list_lock); return result; /* -ERESTARTSYS because signal hit process */ At this point the original process is done with the ioctl and blithely goes ahead handling the signal, reissuing the ioctl, etc. - Eventually, the SCSI command issued by the first ioctl finishes and ends up in sg_rq_end_io(). At the end of that function, we run through: write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { if (sfp->keep_orphan) srp->sg_io_owned = 0; else done = 0; } srp->done = done; write_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (likely(done)) { /* Now wake up any sg_read() that is waiting for this * packet. */ wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); kref_put(&sfp->f_ref, sg_remove_sfp); } else { INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); schedule_work(&srp->ew.work); } Since srp->orphan *is* set, we set done to 0 (assuming the userspace app has not set keep_orphan via an SG_SET_KEEP_ORPHAN ioctl), and therefore we end up scheduling sg_rq_end_io_usercontext() to run in a workqueue. - In workqueue context we go through sg_rq_end_io_usercontext() -> sg_finish_rem_req() -> blk_rq_unmap_user() -> ... -> bio_uncopy_user() -> __bio_copy_iov() -> copy_to_user(). The key point here is that we are doing copy_to_user() on a workqueue -- that is, we're on a kernel thread with current->mm equal to whatever random previous user process was scheduled before this kernel thread. So we end up copying whatever data the SCSI command returned to the virtual address of the buffer passed into the original ioctl, but it's quite likely we do this copying into a different address space! As suggested by James Bottomley , add a check for current->mm (which is NULL if we're on a kernel thread without a real userspace address space) in bio_uncopy_user(), and skip the copy if we're on a kernel thread. There's no reason that I can think of for any caller of bio_uncopy_user() to want to do copying on a kernel thread with a random active userspace address space. Huge thanks to Costa Sapuntzakis for the original pointer to this bug in the sg code. Signed-off-by: Roland Dreier Tested-by: David Milburn Cc: Jens Axboe Cc: Signed-off-by: James Bottomley diff --git a/fs/bio.c b/fs/bio.c index 94bbc04..c5eae72 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1045,12 +1045,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - int ret = 0; + struct bio_vec *bvec; + int ret = 0, i; - if (!bio_flagged(bio, BIO_NULL_MAPPED)) - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, bio_data_dir(bio) == READ, - 0, bmd->is_our_pages); + if (!bio_flagged(bio, BIO_NULL_MAPPED)) { + /* + * if we're in a workqueue, the request is orphaned, so + * don't copy into a random user address space, just free. + */ + if (current->mm) + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); + else if (bmd->is_our_pages) + bio_for_each_segment_all(bvec, bio, i) + __free_page(bvec->bv_page); + } bio_free_map_data(bmd); bio_put(bio); return ret; -- cgit v0.10.2 From 6f1e120438144a47463b80c3e113334a6bd0e77c Mon Sep 17 00:00:00 2001 From: Guillaume Clement Date: Sat, 10 Aug 2013 09:06:04 +0200 Subject: gma500: Fix SDVO turning off randomly Some Poulsbo cards seem to incorrectly report SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED instead of SDVO_CMD_STATUS_PENDING, which causes the display to be turned off. Signed-off-by: Guillaume Clement Acked-by: Patrik Jakobsson Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c index 19e3660..3bc84145 100644 --- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c +++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c @@ -500,7 +500,8 @@ static bool psb_intel_sdvo_read_response(struct psb_intel_sdvo *psb_intel_sdvo, &status)) goto log_fail; - while (status == SDVO_CMD_STATUS_PENDING && retry--) { + while ((status == SDVO_CMD_STATUS_PENDING || + status == SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED) && retry--) { udelay(15); if (!psb_intel_sdvo_read_byte(psb_intel_sdvo, SDVO_I2C_CMD_STATUS, -- cgit v0.10.2 From d79ff142624e1be080ad8d09101f7004d79c36e1 Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 22 Aug 2013 17:45:36 +0200 Subject: [SCSI] zfcp: fix lock imbalance by reworking request queue locking This patch adds wait_event_interruptible_lock_irq_timeout(), which is a straight-forward descendant of wait_event_interruptible_timeout() and wait_event_interruptible_lock_irq(). The zfcp driver used to call wait_event_interruptible_timeout() in combination with some intricate and error-prone locking. Using wait_event_interruptible_lock_irq_timeout() as a replacement nicely cleans up that locking. This rework removes a situation that resulted in a locking imbalance in zfcp_qdio_sbal_get(): BUG: workqueue leaked lock or atomic: events/1/0xffffff00/10 last function: zfcp_fc_wka_port_offline+0x0/0xa0 [zfcp] It was introduced by commit c2af7545aaff3495d9bf9a7608c52f0af86fb194 "[SCSI] zfcp: Do not wait for SBALs on stopped queue", which had a new code path related to ZFCP_STATUS_ADAPTER_QDIOUP that took an early exit without a required lock being held. The problem occured when a special, non-SCSI I/O request was being submitted in process context, when the adapter's queues had been torn down. In this case the bug surfaced when the Fibre Channel port connection for a well-known address was closed during a concurrent adapter shut-down procedure, which is a rare constellation. This patch also fixes these warnings from the sparse tool (make C=1): drivers/s390/scsi/zfcp_qdio.c:224:12: warning: context imbalance in 'zfcp_qdio_sbal_check' - wrong count at exit drivers/s390/scsi/zfcp_qdio.c:244:5: warning: context imbalance in 'zfcp_qdio_sbal_get' - unexpected unlock Last but not least, we get rid of that crappy lock-unlock-lock sequence at the beginning of the critical section. It is okay to call zfcp_erp_adapter_reopen() with req_q_lock held. Reported-by: Mikulas Patocka Reported-by: Heiko Carstens Signed-off-by: Martin Peschke Cc: stable@vger.kernel.org #2.6.35+ Signed-off-by: Steffen Maier Signed-off-by: James Bottomley diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 665e3cf..de0598e 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -224,11 +224,9 @@ int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, static int zfcp_qdio_sbal_check(struct zfcp_qdio *qdio) { - spin_lock_irq(&qdio->req_q_lock); if (atomic_read(&qdio->req_q_free) || !(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)) return 1; - spin_unlock_irq(&qdio->req_q_lock); return 0; } @@ -246,9 +244,8 @@ int zfcp_qdio_sbal_get(struct zfcp_qdio *qdio) { long ret; - spin_unlock_irq(&qdio->req_q_lock); - ret = wait_event_interruptible_timeout(qdio->req_q_wq, - zfcp_qdio_sbal_check(qdio), 5 * HZ); + ret = wait_event_interruptible_lock_irq_timeout(qdio->req_q_wq, + zfcp_qdio_sbal_check(qdio), qdio->req_q_lock, 5 * HZ); if (!(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)) return -EIO; @@ -262,7 +259,6 @@ int zfcp_qdio_sbal_get(struct zfcp_qdio *qdio) zfcp_erp_adapter_reopen(qdio->adapter, 0, "qdsbg_1"); } - spin_lock_irq(&qdio->req_q_lock); return -EIO; } diff --git a/include/linux/wait.h b/include/linux/wait.h index f487a47..a67fc16 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -811,6 +811,63 @@ do { \ __ret; \ }) +#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ + lock, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + spin_unlock_irq(&lock); \ + ret = schedule_timeout(ret); \ + spin_lock_irq(&lock); \ + if (!ret) \ + break; \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses. + * The condition is checked under the lock. This is expected + * to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it + * was interrupted by a signal, and the remaining jiffies otherwise + * if the condition evaluated to true before the timeout elapsed. + */ +#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ + timeout) \ +({ \ + int __ret = timeout; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq_timeout( \ + wq, condition, lock, __ret); \ + __ret; \ +}) + /* * These are the old interfaces to sleep waiting for an event. -- cgit v0.10.2 From 924dd584b198a58aa7cb3efefd8a03326550ce8f Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 22 Aug 2013 17:45:37 +0200 Subject: [SCSI] zfcp: fix schedule-inside-lock in scsi_device list loops BUG: sleeping function called from invalid context at kernel/workqueue.c:2752 in_atomic(): 1, irqs_disabled(): 1, pid: 360, name: zfcperp0.0.1700 CPU: 1 Not tainted 3.9.3+ #69 Process zfcperp0.0.1700 (pid: 360, task: 0000000075b7e080, ksp: 000000007476bc30) Call Trace: ([<00000000001165de>] show_trace+0x106/0x154) [<00000000001166a0>] show_stack+0x74/0xf4 [<00000000006ff646>] dump_stack+0xc6/0xd4 [<000000000017f3a0>] __might_sleep+0x128/0x148 [<000000000015ece8>] flush_work+0x54/0x1f8 [<00000000001630de>] __cancel_work_timer+0xc6/0x128 [<00000000005067ac>] scsi_device_dev_release_usercontext+0x164/0x23c [<0000000000161816>] execute_in_process_context+0x96/0xa8 [<00000000004d33d8>] device_release+0x60/0xc0 [<000000000048af48>] kobject_release+0xa8/0x1c4 [<00000000004f4bf2>] __scsi_iterate_devices+0xfa/0x130 [<000003ff801b307a>] zfcp_erp_strategy+0x4da/0x1014 [zfcp] [<000003ff801b3caa>] zfcp_erp_thread+0xf6/0x2b0 [zfcp] [<000000000016b75a>] kthread+0xf2/0xfc [<000000000070c9de>] kernel_thread_starter+0x6/0xc [<000000000070c9d8>] kernel_thread_starter+0x0/0xc Apparently, the ref_count for some scsi_device drops down to zero, triggering device removal through execute_in_process_context(), while the lldd error recovery thread iterates through a scsi device list. Unfortunately, execute_in_process_context() decides to immediately execute that device removal function, instead of scheduling asynchronous execution, since it detects process context and thinks it is safe to do so. But almost all calls to shost_for_each_device() in our lldd are inside spin_lock_irq, even in thread context. Obviously, schedule() inside spin_lock_irq sections is a bad idea. Change the lldd to use the proper iterator function, __shost_for_each_device(), in combination with required locking. Occurences that need to be changed include all calls in zfcp_erp.c, since those might be executed in zfcp error recovery thread context with a lock held. Other occurences of shost_for_each_device() in zfcp_fsf.c do not need to be changed (no process context, no surrounding locking). The problem was introduced in Linux 2.6.37 by commit b62a8d9b45b971a67a0f8413338c230e3117dff5 "[SCSI] zfcp: Use SCSI device data zfcp_scsi_dev instead of zfcp_unit". Reported-by: Christian Borntraeger Signed-off-by: Martin Peschke Cc: stable@vger.kernel.org #2.6.37+ Signed-off-by: Steffen Maier Signed-off-by: James Bottomley diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 1d4c8fe..c82fe65 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -102,10 +102,13 @@ static void zfcp_erp_action_dismiss_port(struct zfcp_port *port) if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE) zfcp_erp_action_dismiss(&port->erp_action); - else - shost_for_each_device(sdev, port->adapter->scsi_host) + else { + spin_lock(port->adapter->scsi_host->host_lock); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) zfcp_erp_action_dismiss_lun(sdev); + spin_unlock(port->adapter->scsi_host->host_lock); + } } static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) @@ -592,9 +595,11 @@ static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear, { struct scsi_device *sdev; - shost_for_each_device(sdev, port->adapter->scsi_host) + spin_lock(port->adapter->scsi_host->host_lock); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) _zfcp_erp_lun_reopen(sdev, clear, id, 0); + spin_unlock(port->adapter->scsi_host->host_lock); } static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) @@ -1434,8 +1439,10 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask) atomic_set_mask(common_mask, &port->status); read_unlock_irqrestore(&adapter->port_list_lock, flags); - shost_for_each_device(sdev, adapter->scsi_host) + spin_lock_irqsave(adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, adapter->scsi_host) atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status); + spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags); } /** @@ -1469,11 +1476,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask) } read_unlock_irqrestore(&adapter->port_list_lock, flags); - shost_for_each_device(sdev, adapter->scsi_host) { + spin_lock_irqsave(adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, adapter->scsi_host) { atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status); if (clear_counter) atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); } + spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags); } /** @@ -1487,16 +1496,19 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask) { struct scsi_device *sdev; u32 common_mask = mask & ZFCP_COMMON_FLAGS; + unsigned long flags; atomic_set_mask(mask, &port->status); if (!common_mask) return; - shost_for_each_device(sdev, port->adapter->scsi_host) + spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status); + spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags); } /** @@ -1511,6 +1523,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask) struct scsi_device *sdev; u32 common_mask = mask & ZFCP_COMMON_FLAGS; u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED; + unsigned long flags; atomic_clear_mask(mask, &port->status); @@ -1520,13 +1533,15 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask) if (clear_counter) atomic_set(&port->erp_counter, 0); - shost_for_each_device(sdev, port->adapter->scsi_host) + spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags); + __shost_for_each_device(sdev, port->adapter->scsi_host) if (sdev_to_zfcp(sdev)->port == port) { atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status); if (clear_counter) atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); } + spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags); } /** -- cgit v0.10.2 From b5dc3c4800cc5c2c0b3c93a97eb4c7afa0aae49a Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 22 Aug 2013 17:45:38 +0200 Subject: [SCSI] zfcp: remove access control tables interface (keep sysfs files) By popular demand, this patch brings back a couple of sysfs attributes removed by commit 663e0890e31cb85f0cca5ac1faaee0d2d52880b5 "[SCSI] zfcp: remove access control tables interface". The content has been irrelevant for years, but the files must be there forever for whatever user space tools that may rely on them. Since these files always return a constant value, a new stripped down show-macro was required. Otherwise build warnings would have been introduced. Signed-off-by: Martin Peschke Signed-off-by: Steffen Maier Signed-off-by: James Bottomley diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 3f01bbf..8906392 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -27,6 +27,16 @@ static ssize_t zfcp_sysfs_##_feat##_##_name##_show(struct device *dev, \ static ZFCP_DEV_ATTR(_feat, _name, S_IRUGO, \ zfcp_sysfs_##_feat##_##_name##_show, NULL); +#define ZFCP_DEFINE_ATTR_CONST(_feat, _name, _format, _value) \ +static ssize_t zfcp_sysfs_##_feat##_##_name##_show(struct device *dev, \ + struct device_attribute *at,\ + char *buf) \ +{ \ + return sprintf(buf, _format, _value); \ +} \ +static ZFCP_DEV_ATTR(_feat, _name, S_IRUGO, \ + zfcp_sysfs_##_feat##_##_name##_show, NULL); + #define ZFCP_DEFINE_A_ATTR(_name, _format, _value) \ static ssize_t zfcp_sysfs_adapter_##_name##_show(struct device *dev, \ struct device_attribute *at,\ @@ -75,6 +85,8 @@ ZFCP_DEFINE_ATTR(zfcp_unit, unit, in_recovery, "%d\n", ZFCP_DEFINE_ATTR(zfcp_unit, unit, access_denied, "%d\n", (zfcp_unit_sdev_status(unit) & ZFCP_STATUS_COMMON_ACCESS_DENIED) != 0); +ZFCP_DEFINE_ATTR_CONST(unit, access_shared, "%d\n", 0); +ZFCP_DEFINE_ATTR_CONST(unit, access_readonly, "%d\n", 0); static ssize_t zfcp_sysfs_port_failed_show(struct device *dev, struct device_attribute *attr, @@ -347,6 +359,8 @@ static struct attribute *zfcp_unit_attrs[] = { &dev_attr_unit_in_recovery.attr, &dev_attr_unit_status.attr, &dev_attr_unit_access_denied.attr, + &dev_attr_unit_access_shared.attr, + &dev_attr_unit_access_readonly.attr, NULL }; static struct attribute_group zfcp_unit_attr_group = { -- cgit v0.10.2 From 2ca320e294a738c9134a71b5029de05edbfc7aad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 22 Aug 2013 09:55:36 +0200 Subject: ALSA: hda - Fix NULL dereference with CONFIG_SND_DYNAMIC_MINORS=n Without the dynamic minor assignment, HDMI codec may have less PCM instances than the number of pins, which eventually leads to Oops. Reported-by: Stratos Karafotis Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 030ca86..9f35862 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1781,6 +1781,9 @@ static int generic_hdmi_build_controls(struct hda_codec *codec) struct snd_pcm_chmap *chmap; struct snd_kcontrol *kctl; int i; + + if (!codec->pcm_info[pin_idx].pcm) + break; err = snd_pcm_add_chmap_ctls(codec->pcm_info[pin_idx].pcm, SNDRV_PCM_STREAM_PLAYBACK, NULL, 0, pin_idx, &chmap); -- cgit v0.10.2 From 168cf0eca45b86014b8c2a17fcb0673ab1af809b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Aug 2013 23:31:38 +0200 Subject: Revert "ACPI / video: Always call acpi_video_init_brightness() on init" Revert commit c04c697 (ACPI / video: Always call acpi_video_init_brightness() on init), because it breaks eDP backlight at 1920x1080 on Acer Aspire S3 for Trevor Bortins. References: https://bugs.freedesktop.org/show_bug.cgi?id=68355 Reported-and-bisected-by: Trevor Bortins Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index e1284b8..3270d3c 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -908,9 +908,6 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) device->cap._DDC = 1; } - if (acpi_video_init_brightness(device)) - return; - if (acpi_video_backlight_support()) { struct backlight_properties props; struct pci_dev *pdev; @@ -920,6 +917,9 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) static int count = 0; char *name; + result = acpi_video_init_brightness(device); + if (result) + return; name = kasprintf(GFP_KERNEL, "acpi_video%d", count); if (!name) return; @@ -979,11 +979,6 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) if (result) printk(KERN_ERR PREFIX "Create sysfs link\n"); - } else { - /* Remove the brightness object. */ - kfree(device->brightness->levels); - kfree(device->brightness); - device->brightness = NULL; } } -- cgit v0.10.2 From 3955dfa8216f712bc204a5ad2f4e51efff252fde Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 23 Aug 2013 12:37:17 +0100 Subject: staging: comedi: bug-fix NULL pointer dereference on failed attach Commit dcd7b8bd63cb81c5b973bf86510ca3c80bbbd162 ("staging: comedi: put module _after_ detach" by myself) reversed a couple of calls in `comedi_device_attach()` when recovering from an error returned by the low-level driver's 'attach' handler. Unfortunately, that introduced a NULL pointer dereference bug as `dev->driver` is NULL after the call to `comedi_device_detach()`. We still have a pointer to the low-level comedi driver structure in the `driv` variable, so use that instead. Signed-off-by: Ian Abbott Cc: # 3.10+ Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c index e25eba5..b3b5125 100644 --- a/drivers/staging/comedi/drivers.c +++ b/drivers/staging/comedi/drivers.c @@ -482,7 +482,7 @@ int comedi_device_attach(struct comedi_device *dev, struct comedi_devconfig *it) ret = comedi_device_postconfig(dev); if (ret < 0) { comedi_device_detach(dev); - module_put(dev->driver->module); + module_put(driv->module); } /* On success, the driver module count has been incremented. */ return ret; -- cgit v0.10.2 From 9a11899c5e699a8d2551692dfcd4372e39dcbdf6 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 21 Aug 2013 10:33:17 -0400 Subject: USB: OHCI: add missing PCI PM callbacks to ohci-pci.c Commit c1117afb8589 (USB: OHCI: make ohci-pci a separate driver) neglected to preserve the entries for the pci_suspend and pci_resume driver callbacks. As a result, OHCI controllers don't work properly during suspend and after hibernation. This patch adds the missing callbacks to the driver. Signed-off-by: Alan Stern Reported-and-tested-by: Steve Cotton Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 08613e2..0f1d193 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -304,6 +304,11 @@ static int __init ohci_pci_init(void) pr_info("%s: " DRIVER_DESC "\n", hcd_name); ohci_init_driver(&ohci_pci_hc_driver, &pci_overrides); + + /* Entries for the PCI suspend/resume callbacks are special */ + ohci_pci_hc_driver.pci_suspend = ohci_suspend; + ohci_pci_hc_driver.pci_resume = ohci_resume; + return pci_register_driver(&ohci_pci_driver); } module_init(ohci_pci_init); -- cgit v0.10.2 From 52d5b9aba1f5790ca3231c262979c2c3e26dd99b Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Wed, 21 Aug 2013 17:43:31 +0200 Subject: usb: phy: fix build breakage Commit 94ae9843 (usb: phy: rename all phy drivers to phy-$name-usb.c) renamed drivers/usb/phy/otg_fsm.h to drivers/usb/phy/phy-fsm-usb.h but changed drivers/usb/phy/phy-fsm-usb.c to include not existing "phy-otg-fsm.h" instead of new "phy-fsm-usb.h". This breaks building: ... drivers/usb/phy/phy-fsm-usb.c:32:25: fatal error: phy-otg-fsm.h: No such file or directory compilation terminated. make[3]: *** [drivers/usb/phy/phy-fsm-usb.o] Error 1 This commit also missed to modify drivers/usb/phy/phy-fsl-usb.h to include new "phy-fsm-usb.h" instead of "otg_fsm.h" resulting in another build breakage: ... In file included from drivers/usb/phy/phy-fsl-usb.c:46:0: drivers/usb/phy/phy-fsl-usb.h:18:21: fatal error: otg_fsm.h: No such file or directory compilation terminated. make[3]: *** [drivers/usb/phy/phy-fsl-usb.o] Error 1 Fix both issues. Signed-off-by: Anatolij Gustschin Cc: stable # 3.10+ Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/phy/phy-fsl-usb.h b/drivers/usb/phy/phy-fsl-usb.h index ca26628..e1859b8 100644 --- a/drivers/usb/phy/phy-fsl-usb.h +++ b/drivers/usb/phy/phy-fsl-usb.h @@ -15,7 +15,7 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include "otg_fsm.h" +#include "phy-fsm-usb.h" #include #include diff --git a/drivers/usb/phy/phy-fsm-usb.c b/drivers/usb/phy/phy-fsm-usb.c index c520b35..7f45966 100644 --- a/drivers/usb/phy/phy-fsm-usb.c +++ b/drivers/usb/phy/phy-fsm-usb.c @@ -29,7 +29,7 @@ #include #include -#include "phy-otg-fsm.h" +#include "phy-fsm-usb.h" /* Change USB protocol when there is a protocol change */ static int otg_set_protocol(struct otg_fsm *fsm, int protocol) -- cgit v0.10.2 From 118b23022512eb2f41ce42db70dc0568d00be4ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 24 Aug 2013 12:08:17 -0400 Subject: cope with potentially long ->d_dname() output for shmem/hugetlb dynamic_dname() is both too much and too little for those - the output may be well in excess of 64 bytes dynamic_dname() assumes to be enough (thanks to ashmem feeding really long names to shmem_file_setup()) and vsnprintf() is an overkill for those guys. Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 87bdb53..83cfb83 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2724,6 +2724,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, return memcpy(buffer, temp, sz); } +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +{ + char *end = buffer + buflen; + /* these dentries are never renamed, so d_lock is not needed */ + if (prepend(&end, &buflen, " (deleted)", 11) || + prepend_name(&end, &buflen, &dentry->d_name) || + prepend(&end, &buflen, "/", 1)) + end = ERR_PTR(-ENAMETOOLONG); + return end; +} + /* * Write full pathname from the root of the filesystem into the buffer. */ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3442397..d19b30a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -926,14 +926,8 @@ static int get_hstate_idx(int page_size_log) return h - hstates; } -static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", - dentry->d_name.name); -} - static struct dentry_operations anon_ops = { - .d_dname = hugetlb_dname + .d_dname = simple_dname }; /* diff --git a/include/linux/dcache.h b/include/linux/dcache.h index b90337c..4a12532 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -336,6 +336,7 @@ extern int d_validate(struct dentry *, struct dentry *); * helper function for dentry_operations.d_dname() members */ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); +extern char *simple_dname(struct dentry *, char *, int); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); diff --git a/mm/shmem.c b/mm/shmem.c index 8335dbd..e43dc55 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2909,14 +2909,8 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); /* common code */ -static char *shmem_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", - dentry->d_name.name); -} - static struct dentry_operations anon_ops = { - .d_dname = shmem_dname + .d_dname = simple_dname }; /** -- cgit v0.10.2 From a5a1955e0c2d4d325fabdf6b09aa3f9d33e78a10 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 24 Aug 2013 17:14:34 +0200 Subject: proc: kill the extra proc_readfd_common()->dir_emit_dots() proc_readfd_common() does dir_emit_dots() twice in a row, we need to do this only once. Signed-off-by: Oleg Nesterov Signed-off-by: Al Viro diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 75f2890..0ff80f9 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -230,8 +230,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!dir_emit_dots(file, ctx)) goto out; - if (!dir_emit_dots(file, ctx)) - goto out; files = get_files_struct(p); if (!files) goto out; -- cgit v0.10.2 From 136eefa48d638cd8e57e8e2f0087f59c76863bee Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Aug 2013 12:49:40 +0300 Subject: efs: iget_locked() doesn't return an ERR_PTR() The iget_locked() function returns NULL on error and never an ERR_PTR. Signed-off-by: Dan Carpenter Signed-off-by: Al Viro diff --git a/fs/efs/inode.c b/fs/efs/inode.c index f3913eb..d15ccf2 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -57,7 +57,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) struct inode *inode; inode = iget_locked(super, ino); - if (IS_ERR(inode)) + if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; -- cgit v0.10.2 From 821ff77c6c524c40667e01483fc72428b58810b4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Aug 2013 12:51:03 +0300 Subject: bfs: iget_locked() doesn't return an ERR_PTR iget_locked() returns a NULL on error, it doesn't return an ERR_PTR. Signed-off-by: Dan Carpenter Signed-off-by: Al Viro diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 5e376bb..8defc6b 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -40,7 +40,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino) int block, off; inode = iget_locked(sb, ino); - if (IS_ERR(inode)) + if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; -- cgit v0.10.2 From 52e220d357a38cb29fa2e29f34ed94c1d66357f4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Aug 2013 12:44:39 +0300 Subject: VFS: collect_mounts() should return an ERR_PTR This should actually be returning an ERR_PTR on error instead of NULL. That was how it was designed and all the callers expect it. [AV: actually, that's what "VFS: Make clone_mnt()/copy_tree()/collect_mounts() return errors" missed - originally collect_mounts() was expected to return NULL on failure] Cc: # 3.10+ Signed-off-by: Dan Carpenter Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 7b1ca9b..a45ba4f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1429,7 +1429,7 @@ struct vfsmount *collect_mounts(struct path *path) CL_COPY_ALL | CL_PRIVATE); namespace_unlock(); if (IS_ERR(tree)) - return NULL; + return ERR_CAST(tree); return &tree->mnt; } -- cgit v0.10.2 From b0f55f2a1a295c364be012e82dbab079a2454006 Mon Sep 17 00:00:00 2001 From: Joern Rennecke Date: Sat, 24 Aug 2013 12:03:06 +0530 Subject: ARC: [lib] strchr breakage in Big-endian configuration For a search buffer, 2 byte aligned, strchr() was returning pointer outside of buffer (buf - 1) ------------->8---------------- // Input buffer (default 4 byte aigned) char *buffer = "1AA_"; // actual search start (to mimick 2 byte alignment) char *current_line = &(buffer[2]); // Character to search for char c = 'A'; char *c_pos = strchr(current_line, c); printf("%s\n", c_pos) --> 'AA_' as oppose to 'A_' ------------->8---------------- Reported-by: Anton Kolesov Debugged-by: Anton Kolesov Cc: # [3.9 and 3.10] Cc: Noam Camus Signed-off-by: Joern Rennecke Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S index 99c1047..9c548c7 100644 --- a/arch/arc/lib/strchr-700.S +++ b/arch/arc/lib/strchr-700.S @@ -39,9 +39,18 @@ ARC_ENTRY strchr ld.a r2,[r0,4] sub r12,r6,r7 bic r12,r12,r6 +#ifdef __LITTLE_ENDIAN__ and r7,r12,r4 breq r7,0,.Loop ; For speed, we want this branch to be unaligned. b .Lfound_char ; Likewise this one. +#else + and r12,r12,r4 + breq r12,0,.Loop ; For speed, we want this branch to be unaligned. + lsr_s r12,r12,7 + bic r2,r7,r6 + b.d .Lfound_char_b + and_s r2,r2,r12 +#endif ; /* We require this code address to be unaligned for speed... */ .Laligned: ld_s r2,[r0] @@ -95,6 +104,7 @@ ARC_ENTRY strchr lsr r7,r7,7 bic r2,r7,r6 +.Lfound_char_b: norm r2,r2 sub_s r0,r0,4 asr_s r2,r2,3 -- cgit v0.10.2 From f41a64eeabbb2daa70f2b39d0dfb59c91904a1b0 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Wed, 21 Aug 2013 09:14:49 -0700 Subject: Input: wacom - add support for 0x300 and 0x301 Tested-by: Arjuna Rao Chavala Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 384fbcd..f3e91f0 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2112,7 +2112,7 @@ static const struct wacom_features wacom_features_0xDA = { "Wacom Bamboo 2FG 4x5 SE", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 2 }; -static struct wacom_features wacom_features_0xDB = +static const struct wacom_features wacom_features_0xDB = { "Wacom Bamboo 2FG 6x8 SE", WACOM_PKGLEN_BBFUN, 21648, 13700, 1023, 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 2 }; @@ -2127,6 +2127,12 @@ static const struct wacom_features wacom_features_0xDF = { "Wacom Bamboo 16FG 6x8", WACOM_PKGLEN_BBPEN, 21648, 13700, 1023, 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 16 }; +static const struct wacom_features wacom_features_0x300 = + { "Wacom Bamboo One S", WACOM_PKGLEN_BBPEN, 14720, 9225, 1023, + 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x301 = + { "Wacom Bamboo One M", WACOM_PKGLEN_BBPEN, 21648, 13530, 1023, + 31, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x6004 = { "ISD-V4", WACOM_PKGLEN_GRAPHIRE, 12800, 8000, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2253,6 +2259,8 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x100) }, { USB_DEVICE_WACOM(0x101) }, { USB_DEVICE_WACOM(0x10D) }, + { USB_DEVICE_WACOM(0x300) }, + { USB_DEVICE_WACOM(0x301) }, { USB_DEVICE_WACOM(0x304) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x47) }, -- cgit v0.10.2 From acc0444bf83ea559fa84b527b1f49cf20b0df253 Mon Sep 17 00:00:00 2001 From: Matteo Delfino Date: Thu, 15 Aug 2013 00:09:41 -0700 Subject: Input: elantech - fix packet check for v3 and v4 hardware The signatures of v3 and v4 packets change depending on the value of a hardware flag called 'crc_enabled'. The packet type detection must change accordingly. This patch also restores a consistency check for v4 packets inadvertently removed by commit: 9eebed7de660c0b5ab129a9de4f89d20b60de68c Input: elantech - fix for newer hardware versions (v7) A note about the naming convention: v3 hardware is associated with IC body v5 while v4 hardware is associated with IC body v6 and v7. The above commit refers to IC body v7, not to v7 hardware. Tested on Samsung NP730U3E (fw = 0x675f05, ICv7, crc_enabled = 1) Tested-by: Giovanni Frigione Signed-off-by: Matteo Delfino Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 57b2637..8551dca 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -672,6 +672,7 @@ static int elantech_packet_check_v2(struct psmouse *psmouse) */ static int elantech_packet_check_v3(struct psmouse *psmouse) { + struct elantech_data *etd = psmouse->private; const u8 debounce_packet[] = { 0xc4, 0xff, 0xff, 0x02, 0xff, 0xff }; unsigned char *packet = psmouse->packet; @@ -682,19 +683,48 @@ static int elantech_packet_check_v3(struct psmouse *psmouse) if (!memcmp(packet, debounce_packet, sizeof(debounce_packet))) return PACKET_DEBOUNCE; - if ((packet[0] & 0x0c) == 0x04 && (packet[3] & 0xcf) == 0x02) - return PACKET_V3_HEAD; + /* + * If the hardware flag 'crc_enabled' is set the packets have + * different signatures. + */ + if (etd->crc_enabled) { + if ((packet[3] & 0x09) == 0x08) + return PACKET_V3_HEAD; + + if ((packet[3] & 0x09) == 0x09) + return PACKET_V3_TAIL; + } else { + if ((packet[0] & 0x0c) == 0x04 && (packet[3] & 0xcf) == 0x02) + return PACKET_V3_HEAD; - if ((packet[0] & 0x0c) == 0x0c && (packet[3] & 0xce) == 0x0c) - return PACKET_V3_TAIL; + if ((packet[0] & 0x0c) == 0x0c && (packet[3] & 0xce) == 0x0c) + return PACKET_V3_TAIL; + } return PACKET_UNKNOWN; } static int elantech_packet_check_v4(struct psmouse *psmouse) { + struct elantech_data *etd = psmouse->private; unsigned char *packet = psmouse->packet; unsigned char packet_type = packet[3] & 0x03; + bool sanity_check; + + /* + * Sanity check based on the constant bits of a packet. + * The constant bits change depending on the value of + * the hardware flag 'crc_enabled' but are the same for + * every packet, regardless of the type. + */ + if (etd->crc_enabled) + sanity_check = ((packet[3] & 0x08) == 0x00); + else + sanity_check = ((packet[0] & 0x0c) == 0x04 && + (packet[3] & 0x1c) == 0x10); + + if (!sanity_check) + return PACKET_UNKNOWN; switch (packet_type) { case 0: @@ -1313,6 +1343,12 @@ static int elantech_set_properties(struct elantech_data *etd) etd->reports_pressure = true; } + /* + * The signatures of v3 and v4 packets change depending on the + * value of this hardware flag. + */ + etd->crc_enabled = ((etd->fw_version & 0x4000) == 0x4000); + return 0; } diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h index 46db3be..036a04a 100644 --- a/drivers/input/mouse/elantech.h +++ b/drivers/input/mouse/elantech.h @@ -129,6 +129,7 @@ struct elantech_data { bool paritycheck; bool jumpy_cursor; bool reports_pressure; + bool crc_enabled; unsigned char hw_version; unsigned int fw_version; unsigned int single_finger_reports; -- cgit v0.10.2 From d8dfad3876e4386666b759da3c833d62fb8b2267 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Aug 2013 17:43:22 -0700 Subject: Linux 3.11-rc7 diff --git a/Makefile b/Makefile index a5a55f4..369882e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Linux for Workgroups # *DOCUMENTATION* -- cgit v0.10.2 From ed06349fe8d12dcb718984862b6e839fc8606c34 Mon Sep 17 00:00:00 2001 From: Mag Date: Mon, 26 Aug 2013 00:22:01 -0700 Subject: Input: xpad - add signature for Razer Onza Classic Edition Signed-off-by: Nol "Mag" Archinova Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index fa061d4..75e3b10 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -167,6 +167,7 @@ static const struct xpad_device { { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd00, "Razer Onza Tournament Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x1689, 0xfd01, "Razer Onza Classic Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, -- cgit v0.10.2 From 5a25cf1e310888eb333f9e034be84a8117111d30 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 26 Aug 2013 12:31:19 +0200 Subject: xfrm: revert ipv4 mtu determination to dst_mtu In commit 0ea9d5e3e0e03a63b11392f5613378977dae7eca ("xfrm: introduce helper for safe determination of mtu") I switched the determination of ipv4 mtus from dst_mtu to ip_skb_dst_mtu. This was an error because in case of IP_PMTUDISC_PROBE we fall back to the interface mtu, which is never correct for ipv4 ipsec. This patch partly reverts 0ea9d5e3e0e03a63b11392f5613378977dae7eca ("xfrm: introduce helper for safe determination of mtu"). Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ac5b025..e823786 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -1724,15 +1723,4 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } -static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk && skb->protocol == htons(ETH_P_IPV6)) - return ip6_skb_dst_mtu(skb); - else if (sk && skb->protocol == htons(ETH_P_IP)) - return ip_skb_dst_mtu(skb); - return dst_mtu(skb_dst(skb)); -} - #endif /* _NET_XFRM_H */ diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 80baf4a..baa0f63 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -28,7 +28,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) goto out; - mtu = xfrm_skb_dst_mtu(skb); + mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu) { if (skb->sk) xfrm_local_error(skb, mtu); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index e092e30..6cd625e 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -140,10 +140,12 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu = xfrm_skb_dst_mtu(skb); + int mtu; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + if (skb->protocol == htons(ETH_P_IPV6)) + mtu = ip6_skb_dst_mtu(skb); + else + mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); -- cgit v0.10.2 From 9c9c9ad5fae7e9ef56a38acb508a01919b225e9a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 26 Aug 2013 12:31:23 +0200 Subject: ipv6: set skb->protocol on tcp, raw and ip6_append_data genereated skbs Currently we don't initialize skb->protocol when transmitting data via tcp, raw(with and without inclhdr) or udp+ufo or appending data directly to the socket transmit queue (via ip6_append_data). This needs to be done so that we can get the correct mtu in the xfrm layer. Setting of skb->protocol happens only in functions where we also have a transmitting socket and a new skb, so we don't overwrite old values. Cc: Steffen Klassert Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6e3ddf8..e7ceb6c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -238,6 +238,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hdr->saddr = fl6->saddr; hdr->daddr = *first_hop; + skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -1057,6 +1058,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; } @@ -1359,6 +1361,7 @@ alloc_new_skb: /* * Fill in the control structures */ + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; /* reserve for fragmentation and ipsec header */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c45f7a5..cdaed47 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -628,6 +628,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, goto error; skb_reserve(skb, hlen); + skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; skb_dst_set(skb, &rt->dst); -- cgit v0.10.2 From dd5746bf6b48bb837e9f5af14b9b241fc4fdc1ef Mon Sep 17 00:00:00 2001 From: Sarveshwar Bandi Date: Fri, 23 Aug 2013 14:59:33 +0530 Subject: be2net: Check for POST state in suspend-resume sequence In suspend-resume sequence, the OS could attempt to initialize the controller before it is ready, check for POST state before going ahead. Signed-off-by: Sarveshwar Bandi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4559c35..3d91a5e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4373,6 +4373,10 @@ static int be_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); + status = be_fw_wait_ready(adapter); + if (status) + return status; + /* tell fw we're ready to fire cmds */ status = be_cmd_fw_init(adapter); if (status) -- cgit v0.10.2 From d3474049ab6cfcf14274f5ab9f20c8f50b083eab Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 26 Aug 2013 10:53:53 -0400 Subject: USB: OHCI: fix build error related to ohci_suspend/resume Commit 9a11899c5e69 (USB: OHCI: add missing PCI PM callbacks to ohci-pci.c) added missing ohci_suspend and ohci_resume callback pointers, but forgot that these callbacks are declared and defined only when CONFIG_PM is enabled. This patch adds a preprocessor conditional to avoid build errors when PM is disabled. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Reported-by: Meelis Roos , Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 0f1d193..279b049 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -305,9 +305,11 @@ static int __init ohci_pci_init(void) ohci_init_driver(&ohci_pci_hc_driver, &pci_overrides); +#ifdef CONFIG_PM /* Entries for the PCI suspend/resume callbacks are special */ ohci_pci_hc_driver.pci_suspend = ohci_suspend; ohci_pci_hc_driver.pci_resume = ohci_resume; +#endif return pci_register_driver(&ohci_pci_driver); } -- cgit v0.10.2 From f5f6cbb61610b7bf9d9d96db9c3979d62a424bab Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 27 Aug 2013 16:38:33 +1000 Subject: powerpc: Don't Oops when accessing /proc/powerpc/lparcfg without hypervisor /proc/powerpc/lparcfg is an ancient facility (though still actively used) which allows access to some informations relative to the partition when running underneath a PAPR compliant hypervisor. It makes no sense on non-pseries machines. However, currently, not only can it be created on these if the kernel has pseries support, but accessing it on such a machine will crash due to trying to do hypervisor calls. In fact, it should also not do HV calls on older pseries that didn't have an hypervisor either. Finally, it has the plumbing to be a module but is a "bool" Kconfig option. This fixes the whole lot by turning it into a machine_device_initcall that is only created on pseries, and adding the necessary hypervisor check before calling the H_GET_EM_PARMS hypercall CC: Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index d92f387..e2a0a16 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -35,7 +35,13 @@ #include #include #include +#include + +/* + * This isn't a module but we expose that to userspace + * via /proc so leave the definitions here + */ #define MODULE_VERS "1.9" #define MODULE_NAME "lparcfg" @@ -418,7 +424,8 @@ static void parse_em_data(struct seq_file *m) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + if (firmware_has_feature(FW_FEATURE_LPAR) && + plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); } @@ -677,7 +684,6 @@ static int lparcfg_open(struct inode *inode, struct file *file) } static const struct file_operations lparcfg_fops = { - .owner = THIS_MODULE, .read = seq_read, .write = lparcfg_write, .open = lparcfg_open, @@ -699,14 +705,4 @@ static int __init lparcfg_init(void) } return 0; } - -static void __exit lparcfg_cleanup(void) -{ - remove_proc_subtree("powerpc/lparcfg", NULL); -} - -module_init(lparcfg_init); -module_exit(lparcfg_cleanup); -MODULE_DESCRIPTION("Interface for LPAR configuration data"); -MODULE_AUTHOR("Dave Engebretsen"); -MODULE_LICENSE("GPL"); +machine_device_initcall(pseries, lparcfg_init); -- cgit v0.10.2 From bdbc29c19b2633b1d9c52638fb732bcde7a2031a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 27 Aug 2013 16:07:49 +1000 Subject: powerpc: Work around gcc miscompilation of __pa() on 64-bit On 64-bit, __pa(&static_var) gets miscompiled by recent versions of gcc as something like: addis 3,2,.LANCHOR1+4611686018427387904@toc@ha addi 3,3,.LANCHOR1+4611686018427387904@toc@l This ends up effectively ignoring the offset, since its bottom 32 bits are zero, and means that the result of __pa() still has 0xC in the top nibble. This happens with gcc 4.8.1, at least. To work around this, for 64-bit we make __pa() use an AND operator, and for symmetry, we make __va() use an OR operator. Using an AND operator rather than a subtraction ends up with slightly shorter code since it can be done with a single clrldi instruction, whereas it takes three instructions to form the constant (-PAGE_OFFSET) and add it on. (Note that MEMORY_START is always 0 on 64-bit.) CC: Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index dbd9d3c..9cf59816d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -979,6 +979,7 @@ config RELOCATABLE must live at a different physical address than the primary kernel. +# This value must have zeroes in the bottom 60 bits otherwise lots will break config PAGE_OFFSET hex default "0xc000000000000000" diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 988c812..b9f4262 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -211,9 +211,19 @@ extern long long virt_phys_offset; #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) #else +#ifdef CONFIG_PPC64 +/* + * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET + * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. + */ +#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET)) +#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL) + +#else /* 32-bit, non book E */ #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START)) #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START) #endif +#endif /* * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, -- cgit v0.10.2 From d220980b701d838560a70de691b53be007e99e78 Mon Sep 17 00:00:00 2001 From: Eugene Surovegin Date: Mon, 26 Aug 2013 11:53:32 -0700 Subject: powerpc/hvsi: Increase handshake timeout from 200ms to 400ms. This solves a problem observed in kexec'ed kernel where 200ms timeout is too short and bootconsole fails to initialize. Console did eventually become workable but much later into the boot process. Observed timeout was around 260ms, but I decided to make it a little bigger for more reliability. This has been tested on Power7 machine with Petitboot as a primary bootloader and PowerNV firmware. CC: Signed-off-by: Eugene Surovegin Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/tty/hvc/hvsi_lib.c b/drivers/tty/hvc/hvsi_lib.c index 3396eb9..ac27671 100644 --- a/drivers/tty/hvc/hvsi_lib.c +++ b/drivers/tty/hvc/hvsi_lib.c @@ -341,8 +341,8 @@ void hvsilib_establish(struct hvsi_priv *pv) pr_devel("HVSI@%x: ... waiting handshake\n", pv->termno); - /* Try for up to 200s */ - for (timeout = 0; timeout < 20; timeout++) { + /* Try for up to 400ms */ + for (timeout = 0; timeout < 40; timeout++) { if (pv->established) goto established; if (!hvsi_get_packet(pv)) -- cgit v0.10.2 From 6dec97dc92946eb479e6ebb54a61f8226cceefec Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 27 Aug 2013 12:37:18 +0400 Subject: mm: move_ptes -- Set soft dirty bit depending on pte type Dave reported corrupted swap entries | [ 4588.541886] swap_free: Unused swap offset entry 00002d15 | [ 4588.541952] BUG: Bad page map in process trinity-kid12 pte:005a2a80 pmd:22c01f067 and Hugh pointed that in move_ptes _PAGE_SOFT_DIRTY bit set regardless the type of entry pte consists of. The trick here is that when we carry soft dirty status in swap entries we are to use _PAGE_SWP_SOFT_DIRTY instead, because this is the only place in pte which can be used for own needs without intersecting with bits owned by swap entry type/offset. Reported-and-tested-by: Dave Jones Signed-off-by: Cyrill Gorcunov Cc: Pavel Emelyanov Analyzed-by: Hugh Dickins Cc: Hillf Danton Cc: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mremap.c b/mm/mremap.c index 457d34e..0843feb 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -69,6 +70,23 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, return pmd; } +static pte_t move_soft_dirty_pte(pte_t pte) +{ + /* + * Set soft dirty bit so we can notice + * in userspace the ptes were moved. + */ +#ifdef CONFIG_MEM_SOFT_DIRTY + if (pte_present(pte)) + pte = pte_mksoft_dirty(pte); + else if (is_swap_pte(pte)) + pte = pte_swp_mksoft_dirty(pte); + else if (pte_file(pte)) + pte = pte_file_mksoft_dirty(pte); +#endif + return pte; +} + static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, @@ -126,7 +144,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); - set_pte_at(mm, new_addr, new_pte, pte_mksoft_dirty(pte)); + pte = move_soft_dirty_pte(pte); + set_pte_at(mm, new_addr, new_pte, pte); } arch_leave_lazy_mmu_mode(); -- cgit v0.10.2 From d661684cf6820331feae71146c35da83d794467e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Aug 2013 11:39:15 -0700 Subject: net: Check the correct namespace when spoofing pid over SCM_RIGHTS This is a security bug. The follow-up will fix nsproxy to discourage this type of issue from happening again. Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller diff --git a/net/core/scm.c b/net/core/scm.c index 03795d0..b4da80b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -54,7 +54,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || - ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && + ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || -- cgit v0.10.2 From c2b1df2eb42978073ec27c99cc199d20ae48b849 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Aug 2013 11:39:16 -0700 Subject: Rename nsproxy.pid_ns to nsproxy.pid_ns_for_children nsproxy.pid_ns is *not* the task's pid namespace. The name should clarify that. This makes it more obvious that setns on a pid namespace is weird -- it won't change the pid namespace shown in procfs. Signed-off-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 10e5947..b4ec59d 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -14,6 +14,10 @@ struct fs_struct; * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * + * The pid namespace is an exception -- it's accessed using + * task_active_pid_ns. The pid namespace here is the + * namespace that children will use. + * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. @@ -27,7 +31,7 @@ struct nsproxy { struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; - struct pid_namespace *pid_ns; + struct pid_namespace *pid_ns_for_children; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/kernel/fork.c b/kernel/fork.c index e23bb19..bf46287 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1177,7 +1177,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, * don't allow the creation of threads. */ if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && - (task_active_pid_ns(current) != current->nsproxy->pid_ns)) + (task_active_pid_ns(current) != + current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); @@ -1351,7 +1352,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (pid != &init_struct_pid) { retval = -ENOMEM; - pid = alloc_pid(p->nsproxy->pid_ns); + pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 364ceab..997cbb9 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -29,15 +29,15 @@ static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = { - .count = ATOMIC_INIT(1), - .uts_ns = &init_uts_ns, + .count = ATOMIC_INIT(1), + .uts_ns = &init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) - .ipc_ns = &init_ipc_ns, + .ipc_ns = &init_ipc_ns, #endif - .mnt_ns = NULL, - .pid_ns = &init_pid_ns, + .mnt_ns = NULL, + .pid_ns_for_children = &init_pid_ns, #ifdef CONFIG_NET - .net_ns = &init_net, + .net_ns = &init_net, #endif }; @@ -85,9 +85,10 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns); - if (IS_ERR(new_nsp->pid_ns)) { - err = PTR_ERR(new_nsp->pid_ns); + new_nsp->pid_ns_for_children = + copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children); + if (IS_ERR(new_nsp->pid_ns_for_children)) { + err = PTR_ERR(new_nsp->pid_ns_for_children); goto out_pid; } @@ -100,8 +101,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, return new_nsp; out_net: - if (new_nsp->pid_ns) - put_pid_ns(new_nsp->pid_ns); + if (new_nsp->pid_ns_for_children) + put_pid_ns(new_nsp->pid_ns_for_children); out_pid: if (new_nsp->ipc_ns) put_ipc_ns(new_nsp->ipc_ns); @@ -174,8 +175,8 @@ void free_nsproxy(struct nsproxy *ns) put_uts_ns(ns->uts_ns); if (ns->ipc_ns) put_ipc_ns(ns->ipc_ns); - if (ns->pid_ns) - put_pid_ns(ns->pid_ns); + if (ns->pid_ns_for_children) + put_pid_ns(ns->pid_ns_for_children); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6917e8e..601bb36 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -349,8 +349,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) if (ancestor != active) return -EINVAL; - put_pid_ns(nsproxy->pid_ns); - nsproxy->pid_ns = get_pid_ns(new); + put_pid_ns(nsproxy->pid_ns_for_children); + nsproxy->pid_ns_for_children = get_pid_ns(new); return 0; } -- cgit v0.10.2 From 449cfcc1cd28a1c16321c47b1e7e77138411a72a Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 23 Aug 2013 15:40:53 +0200 Subject: jme: lower NAPI weight Since commit 82dc3c63 ("net: introduce NAPI_POLL_WEIGHT") netif_napi_add() produces an error message if a NAPI poll weight greater than 64 is requested. jme requests a quarter of the rx ring size as the NAPI weight. jme's rx ring size is 1 << 9 = 512. Use the standard NAPI weight. v2: proper reference to the related commit Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 7fbe6ab..23de82a 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -3069,7 +3069,7 @@ jme_init_one(struct pci_dev *pdev, jwrite32(jme, JME_APMC, apmc); } - NETIF_NAPI_SET(netdev, &jme->napi, jme_poll, jme->rx_ring_size >> 2) + NETIF_NAPI_SET(netdev, &jme->napi, jme_poll, NAPI_POLL_WEIGHT) spin_lock_init(&jme->phy_lock); spin_lock_init(&jme->macaddr_lock); -- cgit v0.10.2 From 1e4a5282b4791fb9ba68478ecddae9642f9cfefb Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 23 Aug 2013 15:41:09 +0200 Subject: netxen: lower NAPI weight Since commit 82dc3c63 ("net: introduce NAPI_POLL_WEIGHT") netif_napi_add() produces an error message if a NAPI poll weight greater than 64 is requested. Use the standard NAPI weight. v2: proper reference to the related commit Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index 3fe09ab..32675e1 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -1171,7 +1171,6 @@ typedef struct { #define NETXEN_DB_MAPSIZE_BYTES 0x1000 -#define NETXEN_NETDEV_WEIGHT 128 #define NETXEN_ADAPTER_UP_MAGIC 777 #define NETXEN_NIC_PEG_TUNE 0 diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index c401b0b..ec4cf7f 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -197,7 +197,7 @@ netxen_napi_add(struct netxen_adapter *adapter, struct net_device *netdev) for (ring = 0; ring < adapter->max_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; netif_napi_add(netdev, &sds_ring->napi, - netxen_nic_poll, NETXEN_NETDEV_WEIGHT); + netxen_nic_poll, NAPI_POLL_WEIGHT); } return 0; -- cgit v0.10.2 From ae24f261e11ec30d343c7c5ee805ff29782f726b Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 23 Aug 2013 15:41:19 +0200 Subject: ps3_gelic: lower NAPI weight Since commit 82dc3c63 ("net: introduce NAPI_POLL_WEIGHT") netif_napi_add() produces an error message if a NAPI poll weight greater than 64 is requested. GELIC_NET_NAPI_WEIGHT is defined to GELIC_NET_RX_DESCRIPTORS, which is 128. Use the standard NAPI weight. v2: proper reference to the related commit Signed-off-by: Michal Schmidt Acked-by: Geoff Levand Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index ad32af6..9c805e0 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1466,8 +1466,7 @@ static void gelic_ether_setup_netdev_ops(struct net_device *netdev, { netdev->watchdog_timeo = GELIC_NET_WATCHDOG_TIMEOUT; /* NAPI */ - netif_napi_add(netdev, napi, - gelic_net_poll, GELIC_NET_NAPI_WEIGHT); + netif_napi_add(netdev, napi, gelic_net_poll, NAPI_POLL_WEIGHT); netdev->ethtool_ops = &gelic_ether_ethtool_ops; netdev->netdev_ops = &gelic_netdevice_ops; } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index a93df6a..309abb4 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -37,7 +37,6 @@ #define GELIC_NET_RXBUF_ALIGN 128 #define GELIC_CARD_RX_CSUM_DEFAULT 1 /* hw chksum */ #define GELIC_NET_WATCHDOG_TIMEOUT 5*HZ -#define GELIC_NET_NAPI_WEIGHT (GELIC_NET_RX_DESCRIPTORS) #define GELIC_NET_BROADCAST_ADDR 0xffffffffffffL #define GELIC_NET_MC_COUNT_MAX 32 /* multicast address list */ -- cgit v0.10.2 From c730b170456d9139c3c1f8a9c1f91837be657d60 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:00 +0300 Subject: bnx2x: vf mark stats started Solve issue where no stats were being collected for VF devices due to missing configuration in the stats' atomic synchronization mechanism. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index d63d132..fed9b15 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -522,20 +522,16 @@ static void bnx2x_func_stats_init(struct bnx2x *bp) /* should be called under stats_sema */ static void __bnx2x_stats_start(struct bnx2x *bp) { - /* vfs travel through here as part of the statistics FSM, but no action - * is required - */ - if (IS_VF(bp)) - return; - - if (bp->port.pmf) - bnx2x_port_stats_init(bp); + if (IS_PF(bp)) { + if (bp->port.pmf) + bnx2x_port_stats_init(bp); - else if (bp->func_stx) - bnx2x_func_stats_init(bp); + else if (bp->func_stx) + bnx2x_func_stats_init(bp); - bnx2x_hw_stats_post(bp); - bnx2x_storm_stats_post(bp); + bnx2x_hw_stats_post(bp); + bnx2x_storm_stats_post(bp); + } bp->stats_started = true; } -- cgit v0.10.2 From 34d5626afc39c43d63ec7781b648091e92fae45a Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Aug 2013 01:13:01 +0300 Subject: bnx2x: Fix move FP memory deallocations If driver will fail to allocate all queues, it will shrink the number of queues and move the storage queue to its correct place (i.e., the last queue among the newly supported number). When changing the pointers of the new location of the FCoE queue, we need to pay special attention to the aggregations pointer - that memory is allocated during probe and released upon driver removal. Current implementation has 2 pointers pointing to the same chunk of allocated memory, meaning upon removal there will be two kfree() of the same chunk while the other won't be released. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index f2d1ff1..26b4dfc 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -53,6 +53,7 @@ static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to) struct bnx2x_fp_stats *to_fp_stats = &bp->fp_stats[to]; int old_max_eth_txqs, new_max_eth_txqs; int old_txdata_index = 0, new_txdata_index = 0; + struct bnx2x_agg_info *old_tpa_info = to_fp->tpa_info; /* Copy the NAPI object as it has been already initialized */ from_fp->napi = to_fp->napi; @@ -61,6 +62,11 @@ static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to) memcpy(to_fp, from_fp, sizeof(*to_fp)); to_fp->index = to; + /* Retain the tpa_info of the original `to' version as we don't want + * 2 FPs to contain the same tpa_info pointer. + */ + to_fp->tpa_info = old_tpa_info; + /* move sp_objs contents as well, as their indices match fp ones */ memcpy(to_sp_objs, from_sp_objs, sizeof(*to_sp_objs)); -- cgit v0.10.2 From 35a04aa35c2929f24c7f063f42b6d776ad848c24 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:02 +0300 Subject: bnx2x: Fix functionality of configuring vlan list The check on return code of bnx2x_vfop_config_vlan0() would lead to error handling flow as the return value indicating an existing pending ramrod would be erroneously considered as an error. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index ad83f4b..b7efe27 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -522,23 +522,6 @@ static int bnx2x_vfop_set_user_req(struct bnx2x *bp, return 0; } -static int -bnx2x_vfop_config_vlan0(struct bnx2x *bp, - struct bnx2x_vlan_mac_ramrod_params *vlan_mac, - bool add) -{ - int rc; - - vlan_mac->user_req.cmd = add ? BNX2X_VLAN_MAC_ADD : - BNX2X_VLAN_MAC_DEL; - vlan_mac->user_req.u.vlan.vlan = 0; - - rc = bnx2x_config_vlan_mac(bp, vlan_mac); - if (rc == -EEXIST) - rc = 0; - return rc; -} - static int bnx2x_vfop_config_list(struct bnx2x *bp, struct bnx2x_vfop_filters *filters, struct bnx2x_vlan_mac_ramrod_params *vlan_mac) @@ -643,30 +626,14 @@ static void bnx2x_vfop_vlan_mac(struct bnx2x *bp, struct bnx2x_virtf *vf) case BNX2X_VFOP_VLAN_CONFIG_LIST: /* next state */ - vfop->state = BNX2X_VFOP_VLAN_CONFIG_LIST_0; - - /* remove vlan0 - could be no-op */ - vfop->rc = bnx2x_vfop_config_vlan0(bp, vlan_mac, false); - if (vfop->rc) - goto op_err; + vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE; - /* Do vlan list config. if this operation fails we try to - * restore vlan0 to keep the queue is working order - */ + /* do list config */ vfop->rc = bnx2x_vfop_config_list(bp, filters, vlan_mac); if (!vfop->rc) { set_bit(RAMROD_CONT, &vlan_mac->ramrod_flags); vfop->rc = bnx2x_config_vlan_mac(bp, vlan_mac); } - bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT); /* fall-through */ - - case BNX2X_VFOP_VLAN_CONFIG_LIST_0: - /* next state */ - vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE; - - if (list_empty(&obj->head)) - /* add vlan0 */ - vfop->rc = bnx2x_vfop_config_vlan0(bp, vlan_mac, true); bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE); default: -- cgit v0.10.2 From b4cddbd6dd9b3b9e08c26d8b7247e4e011092117 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:03 +0300 Subject: bnx2x: Fix VF memory leak unload Due to incorrect VF/PF conditions, when unloading a VF it will not release part of the memory it has previously allocated. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 26b4dfc..0cc2611 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2962,8 +2962,9 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) if (IS_PF(bp)) { if (CNIC_LOADED(bp)) bnx2x_free_mem_cnic(bp); - bnx2x_free_mem(bp); } + bnx2x_free_mem(bp); + bp->state = BNX2X_STATE_CLOSED; bp->cnic_loaded = false; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 8bdc8b9..1627a4e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -7855,12 +7855,15 @@ void bnx2x_free_mem(struct bnx2x *bp) { int i; - BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping, - sizeof(struct host_sp_status_block)); - BNX2X_PCI_FREE(bp->fw_stats, bp->fw_stats_mapping, bp->fw_stats_data_sz + bp->fw_stats_req_sz); + if (IS_VF(bp)) + return; + + BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping, + sizeof(struct host_sp_status_block)); + BNX2X_PCI_FREE(bp->slowpath, bp->slowpath_mapping, sizeof(struct bnx2x_slowpath)); -- cgit v0.10.2 From a3097bda78c7fb41fd3091ffb70bf7bd946e6997 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 28 Aug 2013 01:13:04 +0300 Subject: bnx2x: Fix VF stats sync Since the PF gathers statistics for the VF, when the VF is about to unload we must synchronize the release of its statistics buffer with the PF, so that no DMA operation will be made to that address after the buffer release. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index b7efe27..e8706e1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2786,6 +2786,18 @@ int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf, dma_addr_t *sb_map) return 0; } +struct set_vf_state_cookie { + struct bnx2x_virtf *vf; + u8 state; +}; + +void bnx2x_set_vf_state(void *cookie) +{ + struct set_vf_state_cookie *p = (struct set_vf_state_cookie *)cookie; + + p->vf->state = p->state; +} + /* VFOP close (teardown the queues, delete mcasts and close HW) */ static void bnx2x_vfop_close(struct bnx2x *bp, struct bnx2x_virtf *vf) { @@ -2836,7 +2848,19 @@ static void bnx2x_vfop_close(struct bnx2x *bp, struct bnx2x_virtf *vf) op_err: BNX2X_ERR("VF[%d] CLOSE error: rc %d\n", vf->abs_vfid, vfop->rc); op_done: - vf->state = VF_ACQUIRED; + + /* need to make sure there are no outstanding stats ramrods which may + * cause the device to access the VF's stats buffer which it will free + * as soon as we return from the close flow. + */ + { + struct set_vf_state_cookie cookie; + + cookie.vf = vf; + cookie.state = VF_ACQUIRED; + bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie); + } + DP(BNX2X_MSG_IOV, "set state to acquired\n"); bnx2x_vfop_end(bp, vf, vfop); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index fed9b15..86436c7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -1993,3 +1993,14 @@ void bnx2x_afex_collect_stats(struct bnx2x *bp, void *void_afex_stats, estats->mac_discard); } } + +void bnx2x_stats_safe_exec(struct bnx2x *bp, + void (func_to_exec)(void *cookie), + void *cookie){ + if (down_timeout(&bp->stats_sema, HZ/10)) + BNX2X_ERR("Unable to acquire stats lock\n"); + bnx2x_stats_comp(bp); + func_to_exec(cookie); + __bnx2x_stats_start(bp); + up(&bp->stats_sema); +} diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h index 853824d..f358450 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h @@ -539,6 +539,9 @@ struct bnx2x; void bnx2x_memset_stats(struct bnx2x *bp); void bnx2x_stats_init(struct bnx2x *bp); void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event); +void bnx2x_stats_safe_exec(struct bnx2x *bp, + void (func_to_exec)(void *cookie), + void *cookie); /** * bnx2x_save_statistics - save statistics when unloading. -- cgit v0.10.2 From fb615499f0ad28ed74201c1cdfddf9e64e205424 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Aug 2013 12:03:01 +0200 Subject: ALSA: opti9xx: Fix conflicting driver object name The recent commit to delay the release of kobject triggered NULL dereferences of opti9xx drivers. The cause is that all snd-opti92x-ad1848, snd-opti92x-cs4231 and snd-opti93x drivers register the PnP card driver with the very same name, and also snd-opti92x-ad1848 and -cs4231 drivers register the ISA driver with the same name, too. When these drivers are built in, quick "register-release-and-re-register" actions occur, and this results in Oops because of the same name is assigned to the kobject. The fix is simply to assign individual names. As a bonus, by using KBUILD_MODNAME, the patch reduces more lines than it adds. The fix is based on the suggestion by Russell King. Reported-and-tested-by: Fengguang Wu Cc: Signed-off-by: Takashi Iwai diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 103b333..6effe99 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -173,11 +173,7 @@ MODULE_DEVICE_TABLE(pnp_card, snd_opti9xx_pnpids); #endif /* CONFIG_PNP */ -#ifdef OPTi93X -#define DEV_NAME "opti93x" -#else -#define DEV_NAME "opti92x" -#endif +#define DEV_NAME KBUILD_MODNAME static char * snd_opti9xx_names[] = { "unknown", @@ -1167,7 +1163,7 @@ static int snd_opti9xx_pnp_resume(struct pnp_card_link *pcard) static struct pnp_card_driver opti9xx_pnpc_driver = { .flags = PNP_DRIVER_RES_DISABLE, - .name = "opti9xx", + .name = DEV_NAME, .id_table = snd_opti9xx_pnpids, .probe = snd_opti9xx_pnp_probe, .remove = snd_opti9xx_pnp_remove, -- cgit v0.10.2 From 302a50bc941010d7a67f288fd0db31981e4d722d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 28 Aug 2013 08:47:14 +0200 Subject: xfrm: Fix potential null pointer dereference in xdst_queue_output The net_device might be not set on the skb when we try refcounting. This leads to a null pointer dereference in xdst_queue_output(). It turned out that the refcount to the net_device is not needed after all. The dst_entry has a refcount to the net_device before we queue the skb, so it can't go away. Therefore we can remove the refcount on queueing to fix the null pointer dereference. Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e52cab3..f77c371 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -320,10 +320,8 @@ static void xfrm_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) { - dev_put(skb->dev); + while ((skb = skb_dequeue(list)) != NULL) kfree_skb(skb); - } } /* Rule must be locked. Release descentant resources, announce @@ -1758,7 +1756,6 @@ static void xfrm_policy_queue_process(unsigned long arg) struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; - struct net_device *dev; struct xfrm_policy *pol = (struct xfrm_policy *)arg; struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; @@ -1805,7 +1802,6 @@ static void xfrm_policy_queue_process(unsigned long arg) dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, &fl, skb->sk, 0); if (IS_ERR(dst)) { - dev_put(skb->dev); kfree_skb(skb); continue; } @@ -1814,9 +1810,7 @@ static void xfrm_policy_queue_process(unsigned long arg) skb_dst_drop(skb); skb_dst_set(skb, dst); - dev = skb->dev; err = dst_output(skb); - dev_put(dev); } return; @@ -1839,7 +1833,6 @@ static int xdst_queue_output(struct sk_buff *skb) } skb_dst_force(skb); - dev_hold(skb->dev); spin_lock_bh(&pq->hold_queue.lock); -- cgit v0.10.2 From f0cc6ffb8ce8961db587e5072168cac0cbc25f05 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 28 Aug 2013 09:18:05 -0700 Subject: Revert "fs: Allow unprivileged linkat(..., AT_EMPTY_PATH) aka flink" This reverts commit bb2314b47996491bbc5add73633905c3120b6268. It wasn't necessarily wrong per se, but we're still busily discussing the exact details of this all, so I'm going to revert it for now. It's true that you can already do flink() through /proc and that flink() isn't new. But as Brad Spengler points out, some secure environments do not mount proc, and flink adds a new interface that can avoid path lookup of the source for those kinds of environments. We may re-do this (and even mark it for stable backporting back in 3.11 and possibly earlier) once the whole discussion about the interface is done. Cc: Andy Lutomirski Cc: Al Viro Cc: Oleg Nesterov Cc: Brad Spengler Signed-off-by: Linus Torvalds diff --git a/fs/namei.c b/fs/namei.c index 89a612e..8b61d10 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3671,11 +3671,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) return -EINVAL; /* - * Using empty names is equivalent to using AT_SYMLINK_FOLLOW - * on /proc/self/fd/. + * To use null names we require CAP_DAC_READ_SEARCH + * This ensures that not everyone will be able to create + * handlink using the passed filedescriptor. */ - if (flags & AT_EMPTY_PATH) + if (flags & AT_EMPTY_PATH) { + if (!capable(CAP_DAC_READ_SEARCH)) + return -ENOENT; how = LOOKUP_EMPTY; + } if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; -- cgit v0.10.2 From 347e2233b7667e336d9f671f1a52dfa3f0416e2c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 28 Aug 2013 13:35:13 -0400 Subject: SUNRPC: Fix memory corruption issue on 32-bit highmem systems Some architectures, such as ARM-32 do not return the same base address when you call kmap_atomic() twice on the same page. This causes problems for the memmove() call in the XDR helper routine "_shift_data_right_pages()", since it defeats the detection of overlapping memory ranges, and has been seen to corrupt memory. The fix is to distinguish between the case where we're doing an inter-page copy or not. In the former case of we know that the memory ranges cannot possibly overlap, so we can additionally micro-optimise by replacing memmove() with memcpy(). Reported-by: Mark Young Reported-by: Matt Craighead Cc: Bruce Fields Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Tested-by: Matt Craighead diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 75edcfa..1504bb1 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -207,10 +207,13 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, pgfrom_base -= copy; vto = kmap_atomic(*pgto); - vfrom = kmap_atomic(*pgfrom); - memmove(vto + pgto_base, vfrom + pgfrom_base, copy); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); flush_dcache_page(*pgto); - kunmap_atomic(vfrom); kunmap_atomic(vto); } while ((len -= copy) != 0); -- cgit v0.10.2 From 9b96309c5b0b9e466773c07a5bc8b7b68fcf010a Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 23 Aug 2013 12:44:55 -0700 Subject: genl: Fix genl dumpit() locking. In case of genl-family with parallel ops off, dumpif() callback is expected to run under genl_lock, But commit def3117493eafd9df (genl: Allow concurrent genl callbacks.) changed this behaviour where only first dumpit() op was called under genl-lock. For subsequent dump, only nlk->cb_lock was taken. Following patch fixes it by defining locked dumpit() and done() callback which takes care of genl-locking. CC: Jesse Gross CC: Johannes Berg Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 512718a..7e0f4d1 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -544,6 +544,30 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc; + + genl_lock(); + rc = ops->dumpit(skb, cb); + genl_unlock(); + return rc; +} + +static int genl_lock_done(struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->done) { + genl_lock(); + rc = ops->done(cb); + genl_unlock(); + } + return rc; +} + static int genl_family_rcv_msg(struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) @@ -572,15 +596,32 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EPERM; if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = ops->dumpit, - .done = ops->done, - }; + int rc; if (ops->dumpit == NULL) return -EOPNOTSUPP; - return netlink_dump_start(net->genl_sock, skb, nlh, &c); + if (!family->parallel_ops) { + struct netlink_dump_control c = { + .data = ops, + .dump = genl_lock_dumpit, + .done = genl_lock_done, + }; + + genl_unlock(); + rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + genl_lock(); + + } else { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + + rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + } + + return rc; } if (ops->doit == NULL) -- cgit v0.10.2 From 33c6b1f6b154894321f5734e50c66621e9134e7e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 23 Aug 2013 12:45:04 -0700 Subject: genl: Hold reference on correct module while netlink-dump. netlink dump operations take module as parameter to hold reference for entire netlink dump duration. Currently it holds ref only on genl module which is not correct when we use ops registered to genl from another module. Following patch adds module pointer to genl_ops so that netlink can hold ref count on it. CC: Jesse Gross CC: Johannes Berg Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 93024a4..8e0b6c8 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -61,6 +61,7 @@ struct genl_family { struct list_head ops_list; /* private */ struct list_head family_list; /* private */ struct list_head mcast_groups; /* private */ + struct module *module; }; /** @@ -121,9 +122,24 @@ struct genl_ops { struct list_head ops_list; }; -extern int genl_register_family(struct genl_family *family); -extern int genl_register_family_with_ops(struct genl_family *family, +extern int __genl_register_family(struct genl_family *family); + +static inline int genl_register_family(struct genl_family *family) +{ + family->module = THIS_MODULE; + return __genl_register_family(family); +} + +extern int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops); + +static inline int genl_register_family_with_ops(struct genl_family *family, + struct genl_ops *ops, size_t n_ops) +{ + family->module = THIS_MODULE; + return __genl_register_family_with_ops(family, ops, n_ops); +} + extern int genl_unregister_family(struct genl_family *family); extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7e0f4d1..0c741ce 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) EXPORT_SYMBOL(genl_unregister_ops); /** - * genl_register_family - register a generic netlink family + * __genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one @@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops); * * Return 0 on success or a negative error code. */ -int genl_register_family(struct genl_family *family) +int __genl_register_family(struct genl_family *family) { int err = -EINVAL; @@ -430,10 +430,10 @@ errout_locked: errout: return err; } -EXPORT_SYMBOL(genl_register_family); +EXPORT_SYMBOL(__genl_register_family); /** - * genl_register_family_with_ops - register a generic netlink family + * __genl_register_family_with_ops - register a generic netlink family * @family: generic netlink family * @ops: operations to be registered * @n_ops: number of elements to register @@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family); * * Return 0 on success or a negative error code. */ -int genl_register_family_with_ops(struct genl_family *family, +int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops) { int err, i; - err = genl_register_family(family); + err = __genl_register_family(family); if (err) return err; @@ -476,7 +476,7 @@ err_out: genl_unregister_family(family); return err; } -EXPORT_SYMBOL(genl_register_family_with_ops); +EXPORT_SYMBOL(__genl_register_family_with_ops); /** * genl_unregister_family - unregister generic netlink family @@ -603,22 +603,24 @@ static int genl_family_rcv_msg(struct genl_family *family, if (!family->parallel_ops) { struct netlink_dump_control c = { + .module = family->module, .data = ops, .dump = genl_lock_dumpit, .done = genl_lock_done, }; genl_unlock(); - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); genl_lock(); } else { struct netlink_dump_control c = { + .module = family->module, .dump = ops->dumpit, .done = ops->done, }; - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); } return rc; -- cgit v0.10.2 From 64c3b252e9fc8256ef7b1b99ba60492163cd06e8 Mon Sep 17 00:00:00 2001 From: Byungho An Date: Sat, 24 Aug 2013 15:31:43 +0900 Subject: net: stmmac: fixed the pbl setting with DT This patch fixed the pbl(programmable burst length) setting using DT. Even though the default pbl is 8, If there is no pbl property in device tree file, pbl is set 0 and it causes bandwidth degradation. Signed-off-by: Byungho An Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 03de76c..1c83a44 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -71,14 +71,18 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, plat->force_sf_dma_mode = 1; } - dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), GFP_KERNEL); - if (!dma_cfg) - return -ENOMEM; - - plat->dma_cfg = dma_cfg; - of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl); - dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst"); - dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst"); + if (of_find_property(np, "snps,pbl", NULL)) { + dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), + GFP_KERNEL); + if (!dma_cfg) + return -ENOMEM; + plat->dma_cfg = dma_cfg; + of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl); + dma_cfg->fixed_burst = + of_property_read_bool(np, "snps,fixed-burst"); + dma_cfg->mixed_burst = + of_property_read_bool(np, "snps,mixed-burst"); + } return 0; } -- cgit v0.10.2 From 3046e2f5b79a86044ac0a29c69610d6ac6a4b882 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Sun, 25 Aug 2013 10:23:46 +0300 Subject: net: add cpu_relax to busy poll loop Add a cpu_relaxt to sk_busy_loop. Julie Cummings reported performance issues when hyperthreading is on. Arjan van de Ven observed that we should have a cpu_relax() in the busy poll loop. Reported-by: Julie Cummings Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8a358a2..829627d 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -123,6 +123,7 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) /* local bh are disabled so it is ok to use _BH */ NET_ADD_STATS_BH(sock_net(sk), LINUX_MIB_BUSYPOLLRXPACKETS, rc); + cpu_relax(); } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && !need_resched() && !busy_loop_timeout(end_time)); -- cgit v0.10.2 From 03803a59e32453ee5737c6096a295f748f03cc49 Mon Sep 17 00:00:00 2001 From: Rob Gardner Date: Sun, 25 Aug 2013 16:02:23 -0600 Subject: net: usb: Add HP hs2434 device to ZLP exception table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds another entry (HP hs2434 Mobile Broadband) to the list of exceptional devices that require a zero length packet in order to function properly. This list was added in commit 844e88f0. The hs2434 is manufactured by Sierra Wireless, who also produces the MC7710, which the ZLP exception list was created for in the first place. So hopefully it is just this one producer's devices that will need this workaround. Tested on a DM1-4310NR HP notebook, which does not function without this change. Signed-off-by: Rob Gardner Acked-by: Bjørn Mork Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 8728198..25ba7ec 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -400,6 +400,10 @@ static const struct usb_device_id mbim_devs[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68a2, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_zlp, }, + /* HP hs2434 Mobile Broadband Module needs ZLPs */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3f0, 0x4b1d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_zlp, + }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info, }, -- cgit v0.10.2 From 282a1dffc1b9976cdf1b0eea3f6f68fda23a7c7e Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Mon, 26 Aug 2013 11:30:55 +0800 Subject: net: xilinx: fix memleak decrease device_node refcount np1 in err case. Signed-off-by: Libo Chen Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index e90e1f4..64b4639 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -175,6 +175,7 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) printk(KERN_WARNING "Setting MDIO clock divisor to " "default %d\n", DEFAULT_CLOCK_DIVISOR); clk_div = DEFAULT_CLOCK_DIVISOR; + of_node_put(np1); goto issue; } -- cgit v0.10.2 From 0f8f2aaaab0b0f9c13635cb02e7d19bdaa9aa1bb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:13:26 -0700 Subject: Add new lockref infrastructure reference implementation This introduces a new "lockref" structure that supports the concept of lockless updates of reference counts that still honor an attached spinlock. NOTE! This reference implementation is not the optimized lockless version, rather it is the fallback implementation using standard spinlocks. The actual optimized versions will be merged into 3.12, but I wanted to get the infrastructure in place and document the new interfaces. [ Also note that this particular commit is drastically cut-down minimal version of the original patch by Waiman. In order to properly credit the original author I'm marking Waiman as the author here, but in the end this patch bears little resemblance to the patch by Waiman. So blame any errors on me editing things down to the point where I can introduce the infrastructure before the merge window for 3.12 actually opens. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds diff --git a/include/linux/lockref.h b/include/linux/lockref.h new file mode 100644 index 0000000..01233e0 --- /dev/null +++ b/include/linux/lockref.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_LOCKREF_H +#define __LINUX_LOCKREF_H + +/* + * Locked reference counts. + * + * These are different from just plain atomic refcounts in that they + * are atomic with respect to the spinlock that goes with them. In + * particular, there can be implementations that don't actually get + * the spinlock for the common decrement/increment operations, but they + * still have to check that the operation is done semantically as if + * the spinlock had been taken (using a cmpxchg operation that covers + * both the lock and the count word, or using memory transactions, for + * example). + */ + +#include + +struct lockref { + spinlock_t lock; + unsigned int count; +}; + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +static inline void lockref_get(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count is 0 + */ +static inline int lockref_get_not_zero(struct lockref *lockref) +{ + int retval = 0; + + spin_lock(&lockref->lock); + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +static inline int lockref_put_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} + +#endif /* __LINUX_LOCKREF_H */ -- cgit v0.10.2 From 98474236f72e5a8b89c14cd7c74f0bb77a4b1a99 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:24:59 -0700 Subject: vfs: make the dentry cache use the lockref infrastructure This just replaces the dentry count/lock combination with the lockref structure that contains both a count and a spinlock, and does the mechanical conversion to use the lockref infrastructure. There are no semantic changes here, it's purely syntactic. The reference lockref implementation uses the spinlock exactly the same way that the old dcache code did, and the bulk of this patch is just expanding the internal "d_count" use in the dcache code to use "d_lockref.count" instead. This is purely preparation for the real change to make the reference count updates be lockless during the 3.12 merge window. [ As with the previous commit, this is a rewritten version of a concept originally from Waiman, so credit goes to him, blame for any errors goes to me. Waiman's patch had some semantic differences for taking advantage of the lockless update in dget_parent(), while this patch is intentionally a pure search-and-replace change with no semantic changes. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds diff --git a/fs/dcache.c b/fs/dcache.c index 83cfb83..b949af8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -229,7 +229,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { - BUG_ON(dentry->d_count); + BUG_ON(dentry->d_lockref.count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -467,7 +467,7 @@ relock: } if (ref) - dentry->d_count--; + dentry->d_lockref.count--; /* * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. @@ -513,15 +513,10 @@ void dput(struct dentry *dentry) return; repeat: - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) might_sleep(); - spin_lock(&dentry->d_lock); - BUG_ON(!dentry->d_count); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) @@ -535,7 +530,7 @@ repeat: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - dentry->d_count--; + dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return; @@ -590,7 +585,7 @@ int d_invalidate(struct dentry * dentry) * We also need to leave mountpoints alone, * directory or not. */ - if (dentry->d_count > 1 && dentry->d_inode) { + if (dentry->d_lockref.count > 1 && dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { spin_unlock(&dentry->d_lock); return -EBUSY; @@ -606,14 +601,12 @@ EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ static inline void __dget_dlock(struct dentry *dentry) { - dentry->d_count++; + dentry->d_lockref.count++; } static inline void __dget(struct dentry *dentry) { - spin_lock(&dentry->d_lock); - __dget_dlock(dentry); - spin_unlock(&dentry->d_lock); + lockref_get(&dentry->d_lockref); } struct dentry *dget_parent(struct dentry *dentry) @@ -634,8 +627,8 @@ repeat: goto repeat; } rcu_read_unlock(); - BUG_ON(!ret->d_count); - ret->d_count++; + BUG_ON(!ret->d_lockref.count); + ret->d_lockref.count++; spin_unlock(&ret->d_lock); return ret; } @@ -718,7 +711,7 @@ restart: spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!dentry->d_count) { + if (!dentry->d_lockref.count) { __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -763,12 +756,8 @@ static void try_prune_one_dentry(struct dentry *dentry) /* Prune ancestors. */ dentry = parent; while (dentry) { - spin_lock(&dentry->d_lock); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } dentry = dentry_kill(dentry, 1); } } @@ -793,7 +782,7 @@ static void shrink_dentry_list(struct list_head *list) * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; @@ -913,7 +902,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry_lru_del(dentry); __d_shrink(dentry); - if (dentry->d_count != 0) { + if (dentry->d_lockref.count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -922,7 +911,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - dentry->d_count, + dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); @@ -933,7 +922,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) list_del(&dentry->d_u.d_child); } else { parent = dentry->d_parent; - parent->d_count--; + parent->d_lockref.count--; list_del(&dentry->d_u.d_child); } @@ -981,7 +970,7 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - dentry->d_count--; + dentry->d_lockref.count--; shrink_dcache_for_umount_subtree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { @@ -1147,7 +1136,7 @@ resume: * loop in shrink_dcache_parent() might not make any progress * and loop forever. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { dentry_lru_move_list(dentry, dispose); @@ -1269,7 +1258,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) smp_wmb(); dentry->d_name.name = dname; - dentry->d_count = 1; + dentry->d_lockref.count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); @@ -1970,7 +1959,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) goto next; } - dentry->d_count++; + dentry->d_lockref.count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -2069,7 +2058,7 @@ again: spin_lock(&dentry->d_lock); inode = dentry->d_inode; isdir = S_ISDIR(inode->i_mode); - if (dentry->d_count == 1) { + if (dentry->d_lockref.count == 1) { if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); @@ -2948,7 +2937,7 @@ resume: } if (!(dentry->d_flags & DCACHE_GENOCIDE)) { dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_count--; + dentry->d_lockref.count--; } spin_unlock(&dentry->d_lock); } @@ -2956,7 +2945,7 @@ resume: struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; - this_parent->d_count--; + this_parent->d_lockref.count--; } this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) diff --git a/fs/namei.c b/fs/namei.c index 8b61d10..7720fbd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -536,8 +536,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) * a reference at this point. */ BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); - BUG_ON(!parent->d_count); - parent->d_count++; + BUG_ON(!parent->d_lockref.count); + parent->d_lockref.count++; spin_unlock(&dentry->d_lock); } spin_unlock(&parent->d_lock); @@ -3327,7 +3327,7 @@ void dentry_unhash(struct dentry *dentry) { shrink_dcache_parent(dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) __d_drop(dentry); spin_unlock(&dentry->d_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4a12532..efdc944 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -9,6 +9,7 @@ #include #include #include +#include struct nameidata; struct path; @@ -100,6 +101,8 @@ extern unsigned int full_name_hash(const unsigned char *, unsigned int); # endif #endif +#define d_lock d_lockref.lock + struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ @@ -112,8 +115,7 @@ struct dentry { unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ /* Ref lookup also touches following */ - unsigned int d_count; /* protected by d_lock */ - spinlock_t d_lock; /* per dentry lock */ + struct lockref d_lockref; /* per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ @@ -318,7 +320,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) assert_spin_locked(&dentry->d_lock); if (!read_seqcount_retry(&dentry->d_seq, seq)) { ret = 1; - dentry->d_count++; + dentry->d_lockref.count++; } return ret; @@ -326,7 +328,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) static inline unsigned d_count(const struct dentry *dentry) { - return dentry->d_count; + return dentry->d_lockref.count; } /* validate "insecure" dentry pointer */ @@ -357,17 +359,14 @@ extern char *dentry_path(struct dentry *, char *, int); static inline struct dentry *dget_dlock(struct dentry *dentry) { if (dentry) - dentry->d_count++; + dentry->d_lockref.count++; return dentry; } static inline struct dentry *dget(struct dentry *dentry) { - if (dentry) { - spin_lock(&dentry->d_lock); - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - } + if (dentry) + lockref_get(&dentry->d_lockref); return dentry; } -- cgit v0.10.2 From 84a78a6504f5c5394a8e558702e5b54131f01d14 Mon Sep 17 00:00:00 2001 From: Nathan Zimmer Date: Wed, 28 Aug 2013 16:35:14 -0700 Subject: timer_list: correct the iterator for timer_list Correct an issue with /proc/timer_list reported by Holger. When reading from the proc file with a sufficiently small buffer, 2k so not really that small, there was one could get hung trying to read the file a chunk at a time. The timer_list_start function failed to account for the possibility that the offset was adjusted outside the timer_list_next. Signed-off-by: Nathan Zimmer Reported-by: Holger Hans Peter Freyther Cc: John Stultz Cc: Thomas Gleixner Cc: Berke Durak Cc: Jeff Layton Tested-by: Al Viro Cc: # 3.10.x Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3bdf283..61ed862 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -265,10 +265,9 @@ static inline void timer_list_header(struct seq_file *m, u64 now) static int timer_list_show(struct seq_file *m, void *v) { struct timer_list_iter *iter = v; - u64 now = ktime_to_ns(ktime_get()); if (iter->cpu == -1 && !iter->second_pass) - timer_list_header(m, now); + timer_list_header(m, iter->now); else if (!iter->second_pass) print_cpu(m, iter->cpu, iter->now); #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -298,33 +297,41 @@ void sysrq_timer_list_show(void) return; } -static void *timer_list_start(struct seq_file *file, loff_t *offset) +static void *move_iter(struct timer_list_iter *iter, loff_t offset) { - struct timer_list_iter *iter = file->private; - - if (!*offset) { - iter->cpu = -1; - iter->now = ktime_to_ns(ktime_get()); - } else if (iter->cpu >= nr_cpu_ids) { + for (; offset; offset--) { + iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); + if (iter->cpu >= nr_cpu_ids) { #ifdef CONFIG_GENERIC_CLOCKEVENTS - if (!iter->second_pass) { - iter->cpu = -1; - iter->second_pass = true; - } else - return NULL; + if (!iter->second_pass) { + iter->cpu = -1; + iter->second_pass = true; + } else + return NULL; #else - return NULL; + return NULL; #endif + } } return iter; } +static void *timer_list_start(struct seq_file *file, loff_t *offset) +{ + struct timer_list_iter *iter = file->private; + + if (!*offset) + iter->now = ktime_to_ns(ktime_get()); + iter->cpu = -1; + iter->second_pass = false; + return move_iter(iter, *offset); +} + static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset) { struct timer_list_iter *iter = file->private; - iter->cpu = cpumask_next(iter->cpu, cpu_online_mask); ++*offset; - return timer_list_start(file, offset); + return move_iter(iter, 1); } static void timer_list_stop(struct seq_file *seq, void *v) -- cgit v0.10.2 From aaaafb7f953c60d9c9eeb1b10ecdbe6970421b24 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 28 Aug 2013 16:35:16 -0700 Subject: Omnikey Cardman 4000: pull in ioctl.h in user header This file uses the ioctl helpers (_IOR/_IOW/etc...), so include ioctl.h for the definitions. Signed-off-by: Mike Frysinger Cc: Harald Welte Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/uapi/linux/cm4000_cs.h b/include/uapi/linux/cm4000_cs.h index bc51f77..1217f75 100644 --- a/include/uapi/linux/cm4000_cs.h +++ b/include/uapi/linux/cm4000_cs.h @@ -2,6 +2,7 @@ #define _UAPI_CM4000_H_ #include +#include #define MAX_ATR 33 -- cgit v0.10.2 From 368ae537e056acd3f751fa276f48423f06803922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Svenning=20S=C3=B8rensen?= Date: Wed, 28 Aug 2013 16:35:17 -0700 Subject: IPC: bugfix for msgrcv with msgtyp < 0 According to 'man msgrcv': "If msgtyp is less than 0, the first message of the lowest type that is less than or equal to the absolute value of msgtyp shall be received." Bug: The kernel only returns a message if its type is 1; other messages with type < abs(msgtype) will never get returned. Fix: After having traversed the list to find the first message with the lowest type, we need to actually return that message. This regression was introduced by commit daaf74cf0867 ("ipc: refactor msg list search into separate function") Signed-off-by: Svenning Soerensen Reviewed-by: Peter Hurley Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/ipc/msg.c b/ipc/msg.c index bd60d7e..9f29d9e 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -839,7 +839,7 @@ static inline void free_copy(struct msg_msg *copy) static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) { - struct msg_msg *msg; + struct msg_msg *msg, *found = NULL; long count = 0; list_for_each_entry(msg, &msq->q_messages, m_list) { @@ -848,6 +848,7 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) *msgtyp, mode)) { if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { *msgtyp = msg->m_type - 1; + found = msg; } else if (mode == SEARCH_NUMBER) { if (*msgtyp == count) return msg; @@ -857,7 +858,7 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) } } - return ERR_PTR(-EAGAIN); + return found ?: ERR_PTR(-EAGAIN); } long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, -- cgit v0.10.2 From 21ea9f5ace3a7317cc3ba1fbc749758021a83136 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Wed, 28 Aug 2013 16:35:18 -0700 Subject: drivers/base/memory.c: fix show_mem_removable() to handle missing sections "cat /sys/devices/system/memory/memory*/removable" crashed the system. The problem is that show_mem_removable() is passing a bad pfn to is_mem_section_removable(), which causes if (!node_online(page_to_nid(page))) to blow up. Why is it passing in a bad pfn? The reason is that show_mem_removable() will loop sections_per_block times. sections_per_block is 16, but mem->section_count is 8, indicating holes in this memory block. Checking that the memory section is present before checking to see if the memory section is removable fixes the problem. harp5-sys:~ # cat /sys/devices/system/memory/memory*/removable 0 1 1 1 1 1 1 1 1 1 1 1 1 1 BUG: unable to handle kernel paging request at ffffea00c3200000 IP: [] is_pageblock_removable_nolock+0x1/0x90 PGD 83ffd4067 PUD 37bdfce067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: autofs4 binfmt_misc rdma_ucm rdma_cm iw_cm ib_addr ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_cm ib_uverbs ib_umad iw_cxgb3 cxgb3 mdio mlx4_en mlx4_ib ib_sa mlx4_core ib_mthca ib_mad ib_core fuse nls_iso8859_1 nls_cp437 vfat fat joydev loop hid_generic usbhid hid hwperf(O) numatools(O) dm_mod iTCO_wdt ipv6 iTCO_vendor_support igb i2c_i801 ioatdma i2c_algo_bit ehci_pci pcspkr lpc_ich i2c_core ehci_hcd ptp sg mfd_core dca rtc_cmos pps_core mperf button xhci_hcd sd_mod crc_t10dif usbcore usb_common scsi_dh_emc scsi_dh_hp_sw scsi_dh_alua scsi_dh_rdac scsi_dh gru(O) xvma(O) xfs crc32c libcrc32c thermal sata_nv processor piix mptsas mptscsih scsi_transport_sas mptbase megaraid_sas fan thermal_sys hwmon ext3 jbd ata_piix ahci libahci libata scsi_mod CPU: 4 PID: 5991 Comm: cat Tainted: G O 3.11.0-rc5-rja-uv+ #10 Hardware name: SGI UV2000/ROMLEY, BIOS SGI UV 2000/3000 series BIOS 01/15/2013 task: ffff88081f034580 ti: ffff880820022000 task.ti: ffff880820022000 RIP: 0010:[] [] is_pageblock_removable_nolock+0x1/0x90 RSP: 0018:ffff880820023df8 EFLAGS: 00010287 RAX: 0000000000040000 RBX: ffffea00c3200000 RCX: 0000000000000004 RDX: ffffea00c30b0000 RSI: 00000000001c0000 RDI: ffffea00c3200000 RBP: ffff880820023e38 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000001 R12: ffffea00c33c0000 R13: 0000160000000000 R14: 6db6db6db6db6db7 R15: 0000000000000001 FS: 00007ffff7fb2700(0000) GS:ffff88083fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffea00c3200000 CR3: 000000081b954000 CR4: 00000000000407e0 Call Trace: show_mem_removable+0x41/0x70 dev_attr_show+0x2a/0x60 sysfs_read_file+0xf7/0x1c0 vfs_read+0xc8/0x130 SyS_read+0x5d/0xa0 system_call_fastpath+0x16/0x1b Signed-off-by: Russ Anderson Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Yinghai Lu Reviewed-by: Yasuaki Ishimatsu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 2b7813e..ec386ee 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -141,6 +141,8 @@ static ssize_t show_mem_removable(struct device *dev, container_of(dev, struct memory_block, dev); for (i = 0; i < sections_per_block; i++) { + if (!present_section_nr(mem->start_section_nr + i)) + continue; pfn = section_nr_to_pfn(mem->start_section_nr + i); ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); } -- cgit v0.10.2 From 6f6b8951897e487ea6f77b90ea01f70a9c363770 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 28 Aug 2013 16:35:20 -0700 Subject: memcg: check that kmem_cache has memcg_params before accessing it If the system had a few memory groups and all of them were destroyed, memcg_limited_groups_array_size has non-zero value, but all new caches are created without memcg_params, because memcg_kmem_enabled() returns false. We try to enumirate child caches in a few places and all of them are potentially dangerous. For example my kernel is compiled with CONFIG_SLAB and it crashed when I tryed to mount a NFS share after a few experiments with kmemcg. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] do_tune_cpucache+0x8a/0xd0 PGD b942a067 PUD b999f067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: fscache(+) ip6table_filter ip6_tables iptable_filter ip_tables i2c_piix4 pcspkr virtio_net virtio_balloon i2c_core floppy CPU: 0 PID: 357 Comm: modprobe Not tainted 3.11.0-rc7+ #59 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800b9f98240 ti: ffff8800ba32e000 task.ti: ffff8800ba32e000 RIP: 0010:[] [] do_tune_cpucache+0x8a/0xd0 RSP: 0018:ffff8800ba32fb70 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffff8800b9f98910 RDI: 0000000000000246 RBP: ffff8800ba32fba0 R08: 0000000000000002 R09: 0000000000000004 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000010 R13: 0000000000000008 R14: 00000000000000d0 R15: ffff8800375d0200 FS: 00007f55f1378740(0000) GS:ffff8800bfa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f24feba57a0 CR3: 0000000037b51000 CR4: 00000000000006f0 Call Trace: enable_cpucache+0x49/0x100 setup_cpu_cache+0x215/0x280 __kmem_cache_create+0x2fa/0x450 kmem_cache_create_memcg+0x214/0x350 kmem_cache_create+0x2b/0x30 fscache_init+0x19b/0x230 [fscache] do_one_initcall+0xfa/0x1b0 load_module+0x1c41/0x26d0 SyS_finit_module+0x86/0xb0 system_call_fastpath+0x16/0x1b Signed-off-by: Andrey Vagin Cc: Pekka Enberg Cc: Christoph Lameter Cc: Glauber Costa Cc: Joonsoo Kim Cc: Michal Hocko Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/slab.h b/mm/slab.h index 620ceed..a535033 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -162,6 +162,8 @@ static inline const char *cache_name(struct kmem_cache *s) static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx) { + if (!s->memcg_params) + return NULL; return s->memcg_params->memcg_caches[idx]; } -- cgit v0.10.2 From 49fa8140e487b492da24c76ac3cccad0a0ae63e4 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 28 Aug 2013 16:35:21 -0700 Subject: fs/ocfs2/super.c: Use bigger nodestr to accomodate 32-bit node numbers While using pacemaker/corosync, the node numbers are generated using IP address as opposed to serial node number generation. This may not fit in a 8-byte string. Use a bigger string to print the complete node number. Signed-off-by: Goldwyn Rodrigues Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 854d809..121da2d 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1022,7 +1022,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; - char nodestr[8]; + char nodestr[12]; struct ocfs2_blockcheck_stats stats; trace_ocfs2_fill_super(sb, data, silent); -- cgit v0.10.2 From b22ce2785d97423846206cceec4efee0c4afd980 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 28 Aug 2013 17:33:37 -0400 Subject: workqueue: cond_resched() after processing each work item If !PREEMPT, a kworker running work items back to back can hog CPU. This becomes dangerous when a self-requeueing work item which is waiting for something to happen races against stop_machine. Such self-requeueing work item would requeue itself indefinitely hogging the kworker and CPU it's running on while stop_machine would wait for that CPU to enter stop_machine while preventing anything else from happening on all other CPUs. The two would deadlock. Jamie Liu reports that this deadlock scenario exists around scsi_requeue_run_queue() and libata port multiplier support, where one port may exclude command processing from other ports. With the right timing, scsi_requeue_run_queue() can end up requeueing itself trying to execute an IO which is asked to be retried while another device has an exclusive access, which in turn can't make forward progress due to stop_machine. Fix it by invoking cond_resched() after executing each work item. Signed-off-by: Tejun Heo Reported-by: Jamie Liu References: http://thread.gmane.org/gmane.linux.kernel/1552567 Cc: stable@vger.kernel.org -- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7f5d4be..e93f7b9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2201,6 +2201,15 @@ __acquires(&pool->lock) dump_stack(); } + /* + * The following prevents a kworker from hogging CPU on !PREEMPT + * kernels, where a requeueing work item waiting for something to + * happen could deadlock with stop_machine as such work item could + * indefinitely requeue itself while all other CPUs are trapped in + * stop_machine. + */ + cond_resched(); + spin_lock_irq(&pool->lock); /* clear cpu intensive status */ -- cgit v0.10.2 From bb78a92f47696b2da49f2692b6a9fa56d07c444a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 28 Aug 2013 16:31:23 -0700 Subject: cgroup: fix rmdir EBUSY regression in 3.11 On 3.11-rc we are seeing cgroup directories left behind when they should have been removed. Here's a trivial reproducer: cd /sys/fs/cgroup/memory mkdir parent parent/child; rmdir parent/child parent rmdir: failed to remove `parent': Device or resource busy It's because cgroup_destroy_locked() (step 1 of destruction) leaves cgroup on parent's children list, letting cgroup_offline_fn() (step 2 of destruction) remove it; but step 2 is run by work queue, which may not yet have removed the children when parent destruction checks the list. Fix that by checking through a non-empty list of children: if every one of them has already been marked CGRP_DEAD, then it's safe to proceed: those children are invisible to userspace, and should not obstruct rmdir. (I didn't see any reason to keep the cgrp->children checks under the unrelated css_set_lock, so moved them out.) tj: Flattened nested ifs a bit and updated comment so that it's correct on both for-3.11-fixes and for-3.12. Signed-off-by: Hugh Dickins Signed-off-by: Tejun Heo diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 781845a..e919633 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4480,6 +4480,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) struct dentry *d = cgrp->dentry; struct cgroup_event *event, *tmp; struct cgroup_subsys *ss; + struct cgroup *child; bool empty; lockdep_assert_held(&d->d_inode->i_mutex); @@ -4490,12 +4491,28 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) * @cgrp from being removed while __put_css_set() is in progress. */ read_lock(&css_set_lock); - empty = list_empty(&cgrp->cset_links) && list_empty(&cgrp->children); + empty = list_empty(&cgrp->cset_links); read_unlock(&css_set_lock); if (!empty) return -EBUSY; /* + * Make sure there's no live children. We can't test ->children + * emptiness as dead children linger on it while being destroyed; + * otherwise, "rmdir parent/child parent" may fail with -EBUSY. + */ + empty = true; + rcu_read_lock(); + list_for_each_entry_rcu(child, &cgrp->children, sibling) { + empty = cgroup_is_dead(child); + if (!empty) + break; + } + rcu_read_unlock(); + if (!empty) + return -EBUSY; + + /* * Block new css_tryget() by killing css refcnts. cgroup core * guarantees that, by the time ->css_offline() is invoked, no new * css reference will be given out via css_tryget(). We can't -- cgit v0.10.2 From fa46c7984092f3dbdbb3bcd7338d81a1168d9d2b Mon Sep 17 00:00:00 2001 From: Mischa Jonker Date: Thu, 29 Aug 2013 09:29:40 -0700 Subject: Input: i8042 - disable the driver on ARC platforms It causes crashes when enabled, and we don't have such a peripheral anyway on ARC platforms. Signed-off-by: Mischa Jonker Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 94c17c2..1e691a3 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -22,7 +22,8 @@ config SERIO_I8042 tristate "i8042 PC Keyboard controller" if EXPERT || !X86 default y depends on !PARISC && (!ARM || ARCH_SHARK || FOOTBRIDGE_HOST) && \ - (!SUPERH || SH_CAYMAN) && !M68K && !BLACKFIN && !S390 + (!SUPERH || SH_CAYMAN) && !M68K && !BLACKFIN && !S390 && \ + !ARC help i8042 is the chip over which the standard AT keyboard and PS/2 mouse are connected to the computer. If you use these devices, -- cgit v0.10.2 From 29eb51a728f9a67a1e1140490fa188561de56588 Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Tue, 6 Aug 2013 13:37:13 +0800 Subject: irqchip: sirf: move from legacy mode to linear irqdomain the series of patches for irqdomain core in 3.11 has broken sirf irq which uses legacy mapping. all users fail in the new kernel while setupping irq. this patch moves to linear irqdomain and drop old legacy irqdomain codes since we don't need it any more, and at the same time, it also fixes the broken interrupts of sirfsoc in 3.11. on the other hand, we actually only have 64 interrupt sources for prima2 and atlas6, but there are 128 interrupt souces for marco which uses GIC. in the legacy codes, sirf gpio also uses legacy irqdomain, so to make gpio interrupt mapping not depend on the prima2/atlas6/marco an use unified marco,we enlarge prima2/atlas6 interrupt number to 128. here we don't need this workaround any more as sirf gpio also moved to linear mode before. so we move SIRFSOC_NUM_IRQS back to 64 too. Signed-off-by: Barry Song Signed-off-by: Olof Johansson diff --git a/drivers/irqchip/irq-sirfsoc.c b/drivers/irqchip/irq-sirfsoc.c index 69ea44e..4851afa 100644 --- a/drivers/irqchip/irq-sirfsoc.c +++ b/drivers/irqchip/irq-sirfsoc.c @@ -23,7 +23,7 @@ #define SIRFSOC_INT_RISC_LEVEL1 0x0024 #define SIRFSOC_INIT_IRQ_ID 0x0038 -#define SIRFSOC_NUM_IRQS 128 +#define SIRFSOC_NUM_IRQS 64 static struct irq_domain *sirfsoc_irqdomain; @@ -32,15 +32,18 @@ sirfsoc_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num) { struct irq_chip_generic *gc; struct irq_chip_type *ct; + int ret; + unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; - gc = irq_alloc_generic_chip("SIRFINTC", 1, irq_start, base, handle_level_irq); - ct = gc->chip_types; + ret = irq_alloc_domain_generic_chips(sirfsoc_irqdomain, num, 1, "irq_sirfsoc", + handle_level_irq, clr, 0, IRQ_GC_INIT_MASK_CACHE); + gc = irq_get_domain_generic_chip(sirfsoc_irqdomain, irq_start); + gc->reg_base = base; + ct = gc->chip_types; ct->chip.irq_mask = irq_gc_mask_clr_bit; ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->regs.mask = SIRFSOC_INT_RISC_MASK0; - - irq_setup_generic_chip(gc, IRQ_MSK(num), IRQ_GC_INIT_MASK_CACHE, IRQ_NOREQUEST, 0); } static asmlinkage void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs) @@ -60,9 +63,8 @@ static int __init sirfsoc_irq_init(struct device_node *np, struct device_node *p if (!base) panic("unable to map intc cpu registers\n"); - /* using legacy because irqchip_generic does not work with linear */ - sirfsoc_irqdomain = irq_domain_add_legacy(np, SIRFSOC_NUM_IRQS, 0, 0, - &irq_domain_simple_ops, base); + sirfsoc_irqdomain = irq_domain_add_linear(np, SIRFSOC_NUM_IRQS, + &irq_generic_chip_ops, base); sirfsoc_alloc_gc(base, 0, 32); sirfsoc_alloc_gc(base + 4, 32, SIRFSOC_NUM_IRQS - 32); -- cgit v0.10.2 From f8ab658b5da3fd11893cad085e0e21b67987c10b Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Tue, 6 Aug 2013 13:37:14 +0800 Subject: arm: prima2: drop nr_irqs in mach as we moved to linear irqdomain we don't need nr_irqs in machine any more after we move to linear irqdomain for sirfsoc irqchip, so drop them. Signed-off-by: Barry Song Signed-off-by: Olof Johansson diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c index 2c70f74..e110b6d 100644 --- a/arch/arm/mach-prima2/common.c +++ b/arch/arm/mach-prima2/common.c @@ -42,7 +42,6 @@ static const char *atlas6_dt_match[] __initdata = { DT_MACHINE_START(ATLAS6_DT, "Generic ATLAS6 (Flattened Device Tree)") /* Maintainer: Barry Song */ - .nr_irqs = 128, .map_io = sirfsoc_map_io, .init_time = sirfsoc_init_time, .init_late = sirfsoc_init_late, @@ -59,7 +58,6 @@ static const char *prima2_dt_match[] __initdata = { DT_MACHINE_START(PRIMA2_DT, "Generic PRIMA2 (Flattened Device Tree)") /* Maintainer: Barry Song */ - .nr_irqs = 128, .map_io = sirfsoc_map_io, .init_time = sirfsoc_init_time, .dma_zone_size = SZ_256M, -- cgit v0.10.2 From c7781a6e3c4a9a17e144ec2db00ebfea327bd627 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Tue, 27 Aug 2013 12:20:40 +0400 Subject: tcp: initialize rcv_tstamp for restored sockets u32 rcv_tstamp; /* timestamp of last received ACK */ Its value used in tcp_retransmit_timer, which closes socket if the last ack was received more then TCP_RTO_MAX ago. Currently rcv_tstamp is initialized to zero and if tcp_retransmit_timer is called before receiving a first ack, the connection is closed. This patch initializes rcv_tstamp to a timestamp, when a socket was restored. Cc: Pavel Emelyanov Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Reported-by: Cyrill Gorcunov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 92fde8d..e297299 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2814,6 +2814,8 @@ void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; + else + tp->rcv_tstamp = tcp_time_stamp; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; -- cgit v0.10.2 From e3e12028315749b7fa2edbc37328e5847be9ede9 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Tue, 27 Aug 2013 12:21:55 +0400 Subject: tcp: don't apply tsoffset if rcv_tsecr is zero The zero value means that tsecr is not valid, so it's a special case. tsoffset is used to customize tcp_time_stamp for one socket. tsoffset is usually zero, it's used when a socket was moved from one host to another host. Currently this issue affects logic of tcp_rcv_rtt_measure_ts. Due to incorrect value of rcv_tsecr, tcp_rcv_rtt_measure_ts sets rto to TCP_RTO_MAX. Cc: Pavel Emelyanov Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Reported-by: Cyrill Gorcunov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28af45a..3ca2139 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3535,7 +3535,10 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + if (*ptr) + tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + else + tp->rx_opt.rcv_tsecr = 0; return true; } return false; @@ -3560,7 +3563,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, } tcp_parse_options(skb, &tp->rx_opt, 1, NULL); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; return true; @@ -5316,7 +5319,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, 0, &foc); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; if (th->ack) { -- cgit v0.10.2 From c27c9322d015dc1d9dfdf31724fca71c0476c4d1 Mon Sep 17 00:00:00 2001 From: Chris Clark Date: Tue, 27 Aug 2013 12:02:15 -0600 Subject: ipv4: sendto/hdrincl: don't use destination address found in header ipv4: raw_sendmsg: don't use header's destination address A sendto() regression was bisected and found to start with commit f8126f1d5136be1 (ipv4: Adjust semantics of rt->rt_gateway.) The problem is that it tries to ARP-lookup the constructed packet's destination address rather than the explicitly provided address. Fix this using FLOWI_FLAG_KNOWN_NH so that given nexthop is used. cf. commit 2ad5b9e4bd314fc685086b99e90e5de3bc59e26b Reported-by: Chris Clark Bisected-by: Chris Clark Tested-by: Chris Clark Suggested-by: Julian Anastasov Signed-off-by: Chris Clark Signed-off-by: David S. Miller diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dd44e0a..61e60d6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -571,7 +571,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | + (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); if (!inet->hdrincl) { -- cgit v0.10.2 From 1f324e38870cc09659cf23bc626f1b8869e201f2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 28 Aug 2013 01:07:25 +0200 Subject: ipv6: Don't depend on per socket memory for neighbour discovery messages Allocating skbs when sending out neighbour discovery messages currently uses sock_alloc_send_skb() based on a per net namespace socket and thus share a socket wmem buffer space. If a netdevice is temporarily unable to transmit due to carrier loss or for other reasons, the queued up ndisc messages will cosnume all of the wmem space and will thus prevent from any more skbs to be allocated even for netdevices that are able to transmit packets. The number of neighbour discovery messages sent is very limited, simply use alloc_skb() and don't depend on any socket wmem space any longer. This patch has orginally been posted by Eric Dumazet in a modified form. Signed-off-by: Thomas Graf Cc: Eric Dumazet Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 04d31c2..5cb98df 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -370,16 +370,12 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; - int err; - skb = sock_alloc_send_skb(sk, - hlen + sizeof(struct ipv6hdr) + len + tlen, - 1, &err); + skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", - __func__, err); + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", + __func__); return NULL; } -- cgit v0.10.2 From 77fa4cbd5fa389e28419bbe8ac491b5fdd54840d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2013 23:50:23 +0300 Subject: drm/i915: ivb: fix edp voltage swing reg val Fix the typo introduced in commit 1a2eb4604b85c5efb343da8a4dcf41288fcfca85 Author: Keith Packard Date: Wed Nov 16 16:26:07 2011 -0800 drm/i915: Hook up Ivybridge eDP This fixes eDP link-training failures and cases where all voltage swing /pre-emphasis levels were tried and failed during clock recovery and - as a fallback - we go on to do channel equalization with the last voltage swing/pre-emphasis level which will succeed. Both issues can lead to a blank screen. v2: - improve commit message CC: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64880 Tested-by: Jeremy Moles Signed-off-by: Imre Deak Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 53cddd9..342f1f3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4440,7 +4440,7 @@ #define EDP_LINK_TRAIN_600MV_0DB_IVB (0x30 <<22) #define EDP_LINK_TRAIN_600MV_3_5DB_IVB (0x36 <<22) #define EDP_LINK_TRAIN_800MV_0DB_IVB (0x38 <<22) -#define EDP_LINK_TRAIN_800MV_3_5DB_IVB (0x33 <<22) +#define EDP_LINK_TRAIN_800MV_3_5DB_IVB (0x3e <<22) /* legacy values */ #define EDP_LINK_TRAIN_500MV_0DB_IVB (0x00 <<22) -- cgit v0.10.2 From 6e4dcff3adbf25acb87e74500a58e3c07bdec40f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 29 Aug 2013 02:32:53 +0200 Subject: drm/vmwgfx: Split GMR2_REMAP commands if they are to large This fixes the piglit test texturing/max-texture-size causing the VM to die due to a too large SVGA command. Signed-off-by: Jakob Bornecrantz Reviewed-by: Biran Paul Reviewed-by: Zack Rusin Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c index 3751730..1a0bf07 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c @@ -29,7 +29,9 @@ #include #include -#define VMW_PPN_SIZE sizeof(unsigned long) +#define VMW_PPN_SIZE (sizeof(unsigned long)) +/* A future safe maximum remap size. */ +#define VMW_PPN_PER_REMAP ((31 * 1024) / VMW_PPN_SIZE) static int vmw_gmr2_bind(struct vmw_private *dev_priv, struct page *pages[], @@ -38,43 +40,61 @@ static int vmw_gmr2_bind(struct vmw_private *dev_priv, { SVGAFifoCmdDefineGMR2 define_cmd; SVGAFifoCmdRemapGMR2 remap_cmd; - uint32_t define_size = sizeof(define_cmd) + 4; - uint32_t remap_size = VMW_PPN_SIZE * num_pages + sizeof(remap_cmd) + 4; uint32_t *cmd; uint32_t *cmd_orig; + uint32_t define_size = sizeof(define_cmd) + sizeof(*cmd); + uint32_t remap_num = num_pages / VMW_PPN_PER_REMAP + ((num_pages % VMW_PPN_PER_REMAP) > 0); + uint32_t remap_size = VMW_PPN_SIZE * num_pages + (sizeof(remap_cmd) + sizeof(*cmd)) * remap_num; + uint32_t remap_pos = 0; + uint32_t cmd_size = define_size + remap_size; uint32_t i; - cmd_orig = cmd = vmw_fifo_reserve(dev_priv, define_size + remap_size); + cmd_orig = cmd = vmw_fifo_reserve(dev_priv, cmd_size); if (unlikely(cmd == NULL)) return -ENOMEM; define_cmd.gmrId = gmr_id; define_cmd.numPages = num_pages; + *cmd++ = SVGA_CMD_DEFINE_GMR2; + memcpy(cmd, &define_cmd, sizeof(define_cmd)); + cmd += sizeof(define_cmd) / sizeof(*cmd); + + /* + * Need to split the command if there are too many + * pages that goes into the gmr. + */ + remap_cmd.gmrId = gmr_id; remap_cmd.flags = (VMW_PPN_SIZE > sizeof(*cmd)) ? SVGA_REMAP_GMR2_PPN64 : SVGA_REMAP_GMR2_PPN32; - remap_cmd.offsetPages = 0; - remap_cmd.numPages = num_pages; - *cmd++ = SVGA_CMD_DEFINE_GMR2; - memcpy(cmd, &define_cmd, sizeof(define_cmd)); - cmd += sizeof(define_cmd) / sizeof(uint32); + while (num_pages > 0) { + unsigned long nr = min(num_pages, (unsigned long)VMW_PPN_PER_REMAP); + + remap_cmd.offsetPages = remap_pos; + remap_cmd.numPages = nr; - *cmd++ = SVGA_CMD_REMAP_GMR2; - memcpy(cmd, &remap_cmd, sizeof(remap_cmd)); - cmd += sizeof(remap_cmd) / sizeof(uint32); + *cmd++ = SVGA_CMD_REMAP_GMR2; + memcpy(cmd, &remap_cmd, sizeof(remap_cmd)); + cmd += sizeof(remap_cmd) / sizeof(*cmd); - for (i = 0; i < num_pages; ++i) { - if (VMW_PPN_SIZE <= 4) - *cmd = page_to_pfn(*pages++); - else - *((uint64_t *)cmd) = page_to_pfn(*pages++); + for (i = 0; i < nr; ++i) { + if (VMW_PPN_SIZE <= 4) + *cmd = page_to_pfn(*pages++); + else + *((uint64_t *)cmd) = page_to_pfn(*pages++); - cmd += VMW_PPN_SIZE / sizeof(*cmd); + cmd += VMW_PPN_SIZE / sizeof(*cmd); + } + + num_pages -= nr; + remap_pos += nr; } - vmw_fifo_commit(dev_priv, define_size + remap_size); + BUG_ON(cmd != cmd_orig + cmd_size / sizeof(*cmd)); + + vmw_fifo_commit(dev_priv, cmd_size); return 0; } -- cgit v0.10.2 From cc0fdd802859eaeb00e1c87dbb655594bed2844c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 30 Aug 2013 17:28:17 +0200 Subject: bridge: separate querier and query timer into IGMP/IPv4 and MLD/IPv6 ones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we would still potentially suffer multicast packet loss if there is just either an IGMP or an MLD querier: For the former case, we would possibly drop IPv6 multicast packets, for the latter IPv4 ones. This is because we are currently assuming that if either an IGMP or MLD querier is present that the other one is present, too. This patch makes the behaviour and fix added in "bridge: disable snooping if there is no querier" (b00589af3b04) to also work if there is either just an IGMP or an MLD querier on the link: It refines the deactivation of the snooping to be protocol specific by using separate timers for the snooped IGMP and MLD queries as well as separate timers for our internal IGMP and MLD queriers. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 69363bd..89659d4 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -71,7 +71,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br)) + br_multicast_querier_exists(br, eth_hdr(skb))) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb, false); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8c561c0..a2fd37e 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -102,7 +102,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br)) { + br_multicast_querier_exists(br, eth_hdr(skb))) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 0daae3e..6319c43 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -414,16 +414,20 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; - if (timer_pending(&br->multicast_querier_timer)) - return -EBUSY; - ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) + if (ip.proto == htons(ETH_P_IP)) { + if (timer_pending(&br->ip4_querier.timer)) + return -EBUSY; + ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - else + } else { + if (timer_pending(&br->ip6_querier.timer)) + return -EBUSY; + ip.u.ip6 = entry->addr.u.ip6; #endif + } spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 08e576a..9d1d0e6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -33,7 +33,8 @@ #include "br_private.h" -static void br_multicast_start_querier(struct net_bridge *br); +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_query *query); unsigned int br_mdb_rehash_seq; static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) @@ -755,20 +756,35 @@ static void br_multicast_local_router_expired(unsigned long data) { } -static void br_multicast_querier_expired(unsigned long data) +static void br_multicast_querier_expired(struct net_bridge *br, + struct bridge_mcast_query *query) { - struct net_bridge *br = (void *)data; - spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || br->multicast_disabled) goto out; - br_multicast_start_querier(br); + br_multicast_start_querier(br, query); out: spin_unlock(&br->multicast_lock); } +static void br_ip4_multicast_querier_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_querier_expired(br, &br->ip4_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_querier_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_querier_expired(br, &br->ip6_query); +} +#endif + static void __br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *ip) @@ -789,37 +805,45 @@ static void __br_multicast_send_query(struct net_bridge *br, } static void br_multicast_send_query(struct net_bridge *br, - struct net_bridge_port *port, u32 sent) + struct net_bridge_port *port, + struct bridge_mcast_query *query) { unsigned long time; struct br_ip br_group; + struct bridge_mcast_querier *querier = NULL; if (!netif_running(br->dev) || br->multicast_disabled || - !br->multicast_querier || - timer_pending(&br->multicast_querier_timer)) + !br->multicast_querier) return; memset(&br_group.u, 0, sizeof(br_group.u)); - br_group.proto = htons(ETH_P_IP); - __br_multicast_send_query(br, port, &br_group); - + if (port ? (query == &port->ip4_query) : + (query == &br->ip4_query)) { + querier = &br->ip4_querier; + br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) - br_group.proto = htons(ETH_P_IPV6); - __br_multicast_send_query(br, port, &br_group); + } else { + querier = &br->ip6_querier; + br_group.proto = htons(ETH_P_IPV6); #endif + } + + if (!querier || timer_pending(&querier->timer)) + return; + + __br_multicast_send_query(br, port, &br_group); time = jiffies; - time += sent < br->multicast_startup_query_count ? + time += query->startup_sent < br->multicast_startup_query_count ? br->multicast_startup_query_interval : br->multicast_query_interval; - mod_timer(port ? &port->multicast_query_timer : - &br->multicast_query_timer, time); + mod_timer(&query->timer, time); } -static void br_multicast_port_query_expired(unsigned long data) +static void br_multicast_port_query_expired(struct net_bridge_port *port, + struct bridge_mcast_query *query) { - struct net_bridge_port *port = (void *)data; struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); @@ -827,25 +851,43 @@ static void br_multicast_port_query_expired(unsigned long data) port->state == BR_STATE_BLOCKING) goto out; - if (port->multicast_startup_queries_sent < - br->multicast_startup_query_count) - port->multicast_startup_queries_sent++; + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; - br_multicast_send_query(port->br, port, - port->multicast_startup_queries_sent); + br_multicast_send_query(port->br, port, query); out: spin_unlock(&br->multicast_lock); } +static void br_ip4_multicast_port_query_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + + br_multicast_port_query_expired(port, &port->ip4_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_port_query_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + + br_multicast_port_query_expired(port, &port->ip6_query); +} +#endif + void br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = 1; setup_timer(&port->multicast_router_timer, br_multicast_router_expired, (unsigned long)port); - setup_timer(&port->multicast_query_timer, - br_multicast_port_query_expired, (unsigned long)port); + setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired, + (unsigned long)port); +#if IS_ENABLED(CONFIG_IPV6) + setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired, + (unsigned long)port); +#endif } void br_multicast_del_port(struct net_bridge_port *port) @@ -853,13 +895,13 @@ void br_multicast_del_port(struct net_bridge_port *port) del_timer_sync(&port->multicast_router_timer); } -static void __br_multicast_enable_port(struct net_bridge_port *port) +static void br_multicast_enable(struct bridge_mcast_query *query) { - port->multicast_startup_queries_sent = 0; + query->startup_sent = 0; - if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 || - del_timer(&port->multicast_query_timer)) - mod_timer(&port->multicast_query_timer, jiffies); + if (try_to_del_timer_sync(&query->timer) >= 0 || + del_timer(&query->timer)) + mod_timer(&query->timer, jiffies); } void br_multicast_enable_port(struct net_bridge_port *port) @@ -870,7 +912,10 @@ void br_multicast_enable_port(struct net_bridge_port *port) if (br->multicast_disabled || !netif_running(br->dev)) goto out; - __br_multicast_enable_port(port); + br_multicast_enable(&port->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + br_multicast_enable(&port->ip6_query); +#endif out: spin_unlock(&br->multicast_lock); @@ -889,7 +934,10 @@ void br_multicast_disable_port(struct net_bridge_port *port) if (!hlist_unhashed(&port->rlist)) hlist_del_init_rcu(&port->rlist); del_timer(&port->multicast_router_timer); - del_timer(&port->multicast_query_timer); + del_timer(&port->ip4_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer(&port->ip6_query.timer); +#endif spin_unlock(&br->multicast_lock); } @@ -1014,14 +1062,15 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif -static void br_multicast_update_querier_timer(struct net_bridge *br, - unsigned long max_delay) +static void +br_multicast_update_querier_timer(struct net_bridge *br, + struct bridge_mcast_querier *querier, + unsigned long max_delay) { - if (!timer_pending(&br->multicast_querier_timer)) - br->multicast_querier_delay_time = jiffies + max_delay; + if (!timer_pending(&querier->timer)) + querier->delay_time = jiffies + max_delay; - mod_timer(&br->multicast_querier_timer, - jiffies + br->multicast_querier_interval); + mod_timer(&querier->timer, jiffies + br->multicast_querier_interval); } /* @@ -1074,12 +1123,13 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, + struct bridge_mcast_querier *querier, int saddr, unsigned long max_delay) { if (saddr) - br_multicast_update_querier_timer(br, max_delay); - else if (timer_pending(&br->multicast_querier_timer)) + br_multicast_update_querier_timer(br, querier, max_delay); + else if (timer_pending(&querier->timer)) return; br_multicast_mark_router(br, port); @@ -1129,7 +1179,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } - br_multicast_query_received(br, port, !!iph->saddr, max_delay); + br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, + max_delay); if (!group) goto out; @@ -1206,8 +1257,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } - br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr), - max_delay); + br_multicast_query_received(br, port, &br->ip6_querier, + !ipv6_addr_any(&ip6h->saddr), max_delay); if (!group) goto out; @@ -1244,7 +1295,9 @@ out: static void br_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, + struct bridge_mcast_querier *querier, + struct bridge_mcast_query *query) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -1255,7 +1308,7 @@ static void br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED) || - timer_pending(&br->multicast_querier_timer)) + timer_pending(&querier->timer)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1263,14 +1316,13 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; - if (br->multicast_querier && - !timer_pending(&br->multicast_querier_timer)) { + if (br->multicast_querier) { __br_multicast_send_query(br, port, &mp->addr); time = jiffies + br->multicast_last_member_count * br->multicast_last_member_interval; - mod_timer(port ? &port->multicast_query_timer : - &br->multicast_query_timer, time); + + mod_timer(&query->timer, time); for (p = mlock_dereference(mp->ports, br); p != NULL; @@ -1323,7 +1375,6 @@ static void br_multicast_leave_group(struct net_bridge *br, mod_timer(&mp->timer, time); } } - out: spin_unlock(&br->multicast_lock); } @@ -1334,6 +1385,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; + struct bridge_mcast_query *query = port ? &port->ip4_query : + &br->ip4_query; if (ipv4_is_local_multicast(group)) return; @@ -1342,7 +1395,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, br_group.proto = htons(ETH_P_IP); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group); + br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query); } #if IS_ENABLED(CONFIG_IPV6) @@ -1352,6 +1405,9 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; + struct bridge_mcast_query *query = port ? &port->ip6_query : + &br->ip6_query; + if (!ipv6_is_transient_multicast(group)) return; @@ -1360,7 +1416,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group); + br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query); } #endif @@ -1622,19 +1678,32 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, return 0; } -static void br_multicast_query_expired(unsigned long data) +static void br_multicast_query_expired(struct net_bridge *br, + struct bridge_mcast_query *query) +{ + spin_lock(&br->multicast_lock); + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; + + br_multicast_send_query(br, NULL, query); + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - spin_lock(&br->multicast_lock); - if (br->multicast_startup_queries_sent < - br->multicast_startup_query_count) - br->multicast_startup_queries_sent++; + br_multicast_query_expired(br, &br->ip4_query); +} - br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent); +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_query_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; - spin_unlock(&br->multicast_lock); + br_multicast_query_expired(br, &br->ip6_query); } +#endif void br_multicast_init(struct net_bridge *br) { @@ -1654,25 +1723,43 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; - br->multicast_querier_delay_time = 0; + br->ip4_querier.delay_time = 0; +#if IS_ENABLED(CONFIG_IPV6) + br->ip6_querier.delay_time = 0; +#endif spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); - setup_timer(&br->multicast_querier_timer, - br_multicast_querier_expired, (unsigned long)br); - setup_timer(&br->multicast_query_timer, br_multicast_query_expired, + setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired, + (unsigned long)br); + setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired, (unsigned long)br); +#if IS_ENABLED(CONFIG_IPV6) + setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired, + (unsigned long)br); + setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired, + (unsigned long)br); +#endif } -void br_multicast_open(struct net_bridge *br) +static void __br_multicast_open(struct net_bridge *br, + struct bridge_mcast_query *query) { - br->multicast_startup_queries_sent = 0; + query->startup_sent = 0; if (br->multicast_disabled) return; - mod_timer(&br->multicast_query_timer, jiffies); + mod_timer(&query->timer, jiffies); +} + +void br_multicast_open(struct net_bridge *br) +{ + __br_multicast_open(br, &br->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + __br_multicast_open(br, &br->ip6_query); +#endif } void br_multicast_stop(struct net_bridge *br) @@ -1684,8 +1771,12 @@ void br_multicast_stop(struct net_bridge *br) int i; del_timer_sync(&br->multicast_router_timer); - del_timer_sync(&br->multicast_querier_timer); - del_timer_sync(&br->multicast_query_timer); + del_timer_sync(&br->ip4_querier.timer); + del_timer_sync(&br->ip4_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer_sync(&br->ip6_querier.timer); + del_timer_sync(&br->ip6_query.timer); +#endif spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); @@ -1788,18 +1879,24 @@ unlock: return err; } -static void br_multicast_start_querier(struct net_bridge *br) +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_query *query) { struct net_bridge_port *port; - br_multicast_open(br); + __br_multicast_open(br, query); list_for_each_entry(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; - __br_multicast_enable_port(port); + if (query == &br->ip4_query) + br_multicast_enable(&port->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + else + br_multicast_enable(&port->ip6_query); +#endif } } @@ -1834,7 +1931,10 @@ rollback: goto rollback; } - br_multicast_start_querier(br); + br_multicast_start_querier(br, &br->ip4_query); +#if IS_ENABLED(CONFIG_IPV6) + br_multicast_start_querier(br, &br->ip6_query); +#endif unlock: spin_unlock_bh(&br->multicast_lock); @@ -1857,10 +1957,18 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) goto unlock; max_delay = br->multicast_query_response_interval; - if (!timer_pending(&br->multicast_querier_timer)) - br->multicast_querier_delay_time = jiffies + max_delay; - br_multicast_start_querier(br); + if (!timer_pending(&br->ip4_querier.timer)) + br->ip4_querier.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip4_query); + +#if IS_ENABLED(CONFIG_IPV6) + if (!timer_pending(&br->ip6_querier.timer)) + br->ip6_querier.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip6_query); +#endif unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2f7da41..263ba90 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -66,6 +66,20 @@ struct br_ip __u16 vid; }; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +/* our own querier */ +struct bridge_mcast_query { + struct timer_list timer; + u32 startup_sent; +}; + +/* other querier */ +struct bridge_mcast_querier { + struct timer_list timer; + unsigned long delay_time; +}; +#endif + struct net_port_vlans { u16 port_idx; u16 pvid; @@ -162,10 +176,12 @@ struct net_bridge_port #define BR_FLOOD 0x00000040 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - u32 multicast_startup_queries_sent; + struct bridge_mcast_query ip4_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_query ip6_query; +#endif /* IS_ENABLED(CONFIG_IPV6) */ unsigned char multicast_router; struct timer_list multicast_router_timer; - struct timer_list multicast_query_timer; struct hlist_head mglist; struct hlist_node rlist; #endif @@ -258,7 +274,6 @@ struct net_bridge u32 hash_max; u32 multicast_last_member_count; - u32 multicast_startup_queries_sent; u32 multicast_startup_query_count; unsigned long multicast_last_member_interval; @@ -267,15 +282,18 @@ struct net_bridge unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; - unsigned long multicast_querier_delay_time; spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; struct hlist_head router_list; struct timer_list multicast_router_timer; - struct timer_list multicast_querier_timer; - struct timer_list multicast_query_timer; + struct bridge_mcast_querier ip4_querier; + struct bridge_mcast_query ip4_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_querier ip6_querier; + struct bridge_mcast_query ip6_query; +#endif /* IS_ENABLED(CONFIG_IPV6) */ #endif struct timer_list hello_timer; @@ -503,11 +521,27 @@ static inline bool br_multicast_is_router(struct net_bridge *br) timer_pending(&br->multicast_router_timer)); } -static inline bool br_multicast_querier_exists(struct net_bridge *br) +static inline bool +__br_multicast_querier_exists(struct net_bridge *br, + struct bridge_mcast_querier *querier) +{ + return time_is_before_jiffies(querier->delay_time) && + (br->multicast_querier || timer_pending(&querier->timer)); +} + +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) { - return time_is_before_jiffies(br->multicast_querier_delay_time) && - (br->multicast_querier || - timer_pending(&br->multicast_querier_timer)); + switch (eth->h_proto) { + case (htons(ETH_P_IP)): + return __br_multicast_querier_exists(br, &br->ip4_querier); +#if IS_ENABLED(CONFIG_IPV6) + case (htons(ETH_P_IPV6)): + return __br_multicast_querier_exists(br, &br->ip6_querier); +#endif + default: + return false; + } } #else static inline int br_multicast_rcv(struct net_bridge *br, @@ -565,7 +599,8 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } -static inline bool br_multicast_querier_exists(struct net_bridge *br) +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) { return false; } -- cgit v0.10.2 From eb8895debe1baba41fcb62c78a16f0c63c21662a Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Tue, 27 Aug 2013 16:41:40 -0700 Subject: tcp: tcp_make_synack() should use sock_wmalloc In commit 90ba9b19 (tcp: tcp_make_synack() can use alloc_skb()), Eric changed the call to sock_wmalloc in tcp_make_synack to alloc_skb. In doing so, the netfilter owner match lost its ability to block the SYNACK packet on outbound listening sockets. Revert the change, restoring the owner match functionality. This closes netfilter bugzilla #847. Signed-off-by: Phil Oester Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e297299..170737a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2670,7 +2670,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, int tcp_header_size; int mss; - skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; -- cgit v0.10.2 From 2c8d85182348021fc0a1bed193a4be4161dc8364 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Wed, 28 Aug 2013 09:29:58 +0200 Subject: tipc: set sk_err correctly when connection fails Should a connect fail, if the publication/server is unavailable or due to some other error, a positive value will be returned and errno is never set. If the application code checks for an explicit zero return from connect (success) or a negative return (failure), it will not catch the error and subsequent send() calls will fail as shown from the strace snippet below. socket(0x1e /* PF_??? */, SOCK_SEQPACKET, 0) = 3 connect(3, {sa_family=0x1e /* AF_??? */, sa_data="\2\1\322\4\0\0\322\4\0\0\0\0\0\0"}, 16) = 111 sendto(3, "test", 4, 0, NULL, 0) = -1 EPIPE (Broken pipe) The reason for this behaviour is that TIPC wrongly inverts error codes set in sk_err. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ce8249c..6cc7ddd 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1257,7 +1257,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) /* Accept only ACK or NACK message */ if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; - sk->sk_err = -ECONNREFUSED; + sk->sk_err = ECONNREFUSED; retval = TIPC_OK; break; } @@ -1268,7 +1268,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) res = auto_connect(sock, msg); if (res) { sock->state = SS_DISCONNECTING; - sk->sk_err = res; + sk->sk_err = -res; retval = TIPC_OK; break; } -- cgit v0.10.2 From 737e828bdbdaf2f9d7de07f20a0308ac46ce5178 Mon Sep 17 00:00:00 2001 From: Li Hongjun Date: Wed, 28 Aug 2013 11:54:50 +0200 Subject: ipv4 tunnels: fix an oops when using ipip/sit with IPsec Since commit 3d7b46cd20e3 (ip_tunnel: push generic protocol handling to ip_tunnel module.), an Oops is triggered when an xfrm policy is configured on an IPv4 over IPv4 tunnel. xfrm4_policy_check() calls __xfrm_policy_check2(), which uses skb_dst(skb). But this field is NULL because iptunnel_pull_header() calls skb_dst_drop(skb). Signed-off-by: Li Hongjun Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 51fc2a1..b3ac3c3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -190,15 +190,14 @@ static int ipip_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph; - if (iptunnel_pull_header(skb, 0, tpi.proto)) - goto drop; - iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fbfc5a8..21b25dd 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -645,11 +645,7 @@ static int ipip_rcv(struct sk_buff *skb) const struct iphdr *iph; struct ip_tunnel *tunnel; - if (iptunnel_pull_header(skb, 0, tpi.proto)) - goto drop; - iph = ip_hdr(skb); - tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -659,6 +655,8 @@ static int ipip_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); } -- cgit v0.10.2 From 25ad6117e73656071b38fd19fa67ae325471c758 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 30 Aug 2013 17:39:33 -0400 Subject: Revert "ipv6: Don't depend on per socket memory for neighbour discovery messages" This reverts commit 1f324e38870cc09659cf23bc626f1b8869e201f2. It seems to cause regressions, and in particular the output path really depends upon there being a socket attached to skb->sk for checks such as sk_mc_loop(skb->sk) for example. See ip6_output_finish2(). Reported-by: Stephen Warren Reported-by: Fabio Estevam Signed-off-by: David S. Miller diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5cb98df..04d31c2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -370,12 +370,16 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; + int err; - skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); + skb = sock_alloc_send_skb(sk, + hlen + sizeof(struct ipv6hdr) + len + tlen, + 1, &err); if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", + __func__, err); return NULL; } -- cgit v0.10.2 From 702821f4ea6f68db18aa1de7d8ed62c6ba586a64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Aug 2013 18:10:43 -0700 Subject: net: revert 8728c544a9c ("net: dev_pick_tx() fix") commit 8728c544a9cbdc ("net: dev_pick_tx() fix") and commit b6fe83e9525a ("bonding: refine IFF_XMIT_DST_RELEASE capability") are quite incompatible : Queue selection is disabled because skb dst was dropped before entering bonding device. This causes major performance regression, mainly because TCP packets for a given flow can be sent to multiple queues. This is particularly visible when using the new FQ packet scheduler with MQ + FQ setup on the slaves. We can safely revert the first commit now that 416186fbf8c5b ("net: Split core bits of netdev_pick_tx into __netdev_pick_tx") properly caps the queue_index. Reported-by: Xi Wang Diagnosed-by: Xi Wang Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Alexander Duyck Cc: Denys Fedorysychenko Signed-off-by: David S. Miller diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b84a1b1..d12e3a9 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -346,14 +346,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) if (new_index < 0) new_index = skb_tx_hash(dev, skb); - if (queue_index != new_index && sk) { - struct dst_entry *dst = - rcu_dereference_check(sk->sk_dst_cache, 1); - - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - - } + if (queue_index != new_index && sk && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, queue_index); queue_index = new_index; } -- cgit v0.10.2 From 0d63c27d9e879a0b54eb405636d60ab12040ca46 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Aug 2013 11:47:00 +0300 Subject: mISDN: return -EINVAL on error in dsp_control_req() If skb->len is too short then we should return an error. Otherwise we read beyond the end of skb->data for several bytes. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 22b720e..77025f5 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -288,8 +288,10 @@ dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb) u8 *data; int len; - if (skb->len < sizeof(int)) + if (skb->len < sizeof(int)) { printk(KERN_ERR "%s: PH_CONTROL message too short\n", __func__); + return -EINVAL; + } cont = *((int *)skb->data); len = skb->len - sizeof(int); data = skb->data + sizeof(int); -- cgit v0.10.2 From 2d98c29b6fb3de44d9eaa73c09f9cf7209346383 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Aug 2013 23:55:05 +0200 Subject: net: bridge: convert MLDv2 Query MRC into msecs_to_jiffies for max_delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking into MLDv1/v2 code, I noticed that bridging code does not convert it's max delay into jiffies for MLDv2 messages as we do in core IPv6' multicast code. RFC3810, 5.1.3. Maximum Response Code says: The Maximum Response Code field specifies the maximum time allowed before sending a responding Report. The actual time allowed, called the Maximum Response Delay, is represented in units of milliseconds, and is derived from the Maximum Response Code as follows: [...] As we update timers that work with jiffies, we need to convert it. Signed-off-by: Daniel Borkmann Cc: Linus Lüssing Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9d1d0e6..bbcb435 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1254,7 +1254,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; + + max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL); } br_multicast_query_received(br, port, &br->ip6_querier, -- cgit v0.10.2 From 0affdf347ffc0c3a4595661c091e8cc5f1346e92 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 30 Aug 2013 20:28:10 +0200 Subject: net: fec: fix time stamping logic after napi conversion Commit dc975382 "net: fec: add napi support to improve proformance" converted the fec driver to the napi model. However, that commit forgot to remove the call to skb_defer_rx_timestamp which is only needed in non-napi drivers. (The function napi_gro_receive eventually calls netif_receive_skb, which in turn calls skb_defer_rx_timestamp.) This patch should also be applied to the 3.9 and 3.10 kernels. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 77ea0db..c610a27 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -971,8 +971,7 @@ fec_enet_rx(struct net_device *ndev, int budget) htons(ETH_P_8021Q), vlan_tag); - if (!skb_defer_rx_timestamp(skb)) - napi_gro_receive(&fep->napi, skb); + napi_gro_receive(&fep->napi, skb); } bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data, -- cgit v0.10.2 From de80963e6108d08f337c55d9b85838b0ba2bde44 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 30 Aug 2013 09:50:42 +0100 Subject: MAINTAINERS: change my DT related maintainer address Filtering capabilities on my work email are pretty much non-existent and this has turned out to be something of a firehose... Cc: Stephen Warren Cc: Rob Herring Cc: Olof Johansson Cc: Linus Walleij Signed-off-by: Ian Campbell Acked-by: Pawel Moll Acked-by: Mark Rutland Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 8197fbd..b140c81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6066,7 +6066,7 @@ M: Rob Herring M: Pawel Moll M: Mark Rutland M: Stephen Warren -M: Ian Campbell +M: Ian Campbell L: devicetree@vger.kernel.org S: Maintained F: Documentation/devicetree/ -- cgit v0.10.2 From 61e76b178dbe7145e8d6afa84bb4ccea71918994 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Fri, 30 Aug 2013 11:18:45 +0200 Subject: ICMPv6: treat dest unreachable codes 5 and 6 as EACCES, not EPROTO RFC 4443 has defined two additional codes for ICMPv6 type 1 (destination unreachable) messages: 5 - Source address failed ingress/egress policy 6 - Reject route to destination Now they are treated as protocol error and icmpv6_err_convert() converts them to EPROTO. RFC 4443 says: "Codes 5 and 6 are more informative subsets of code 1." Treat codes 5 and 6 as code 1 (EACCES) Btw, connect() returning -EPROTO confuses firefox, so that fallback to other/IPv4 addresses does not work: https://bugzilla.mozilla.org/show_bug.cgi?id=910773 Signed-off-by: Jiri Bohac Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index e0133c7..590beda 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -115,6 +115,8 @@ struct icmp6hdr { #define ICMPV6_NOT_NEIGHBOUR 2 #define ICMPV6_ADDR_UNREACH 3 #define ICMPV6_PORT_UNREACH 4 +#define ICMPV6_POLICY_FAIL 5 +#define ICMPV6_REJECT_ROUTE 6 /* * Codes for Time Exceeded diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7cfc8d2..67ae4e0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -940,6 +940,14 @@ static const struct icmp6_err { .err = ECONNREFUSED, .fatal = 1, }, + { /* POLICY_FAIL */ + .err = EACCES, + .fatal = 1, + }, + { /* REJECT_ROUTE */ + .err = EACCES, + .fatal = 1, + }, }; int icmpv6_err_convert(u8 type, u8 code, int *err) @@ -951,7 +959,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) switch (type) { case ICMPV6_DEST_UNREACH: fatal = 1; - if (code <= ICMPV6_PORT_UNREACH) { + if (code < ARRAY_SIZE(tab_unreach)) { *err = tab_unreach[code].err; fatal = tab_unreach[code].fatal; } -- cgit v0.10.2 From 13c7bf0871509723a2b3c054d3d6e9e506464e71 Mon Sep 17 00:00:00 2001 From: Petr Holasek Date: Fri, 30 Aug 2013 17:02:38 +0200 Subject: ipv6: ipv6_create_tempaddr cleanup This two-liner removes max_addresses variable which is now unecessary related to patch [ipv6: remove max_addresses check from ipv6_create_tempaddr]. Signed-off-by: Petr Holasek Acked-by: Hannes Frederic Sowa Acked-by: Ding Tianhong Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 498ea99..05c83dd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1054,7 +1054,6 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i unsigned long regen_advance; int tmp_plen; int ret = 0; - int max_addresses; u32 addr_flags; unsigned long now = jiffies; @@ -1100,7 +1099,6 @@ retry: idev->cnf.temp_prefered_lft + age - idev->cnf.max_desync_factor); tmp_plen = ifp->prefix_len; - max_addresses = idev->cnf.max_addresses; tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); -- cgit v0.10.2 From 50ae3c22763b77d55c59fb00274d4b9800e0c857 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:19 -0500 Subject: net: calxedaxgmac: remove NETIF_F_FRAGLIST setting The xgmac does not actually handle frag lists, so this option should not be set. It does not appear to have had any impact though. Reported-by: Lennert Buytenhek Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 7cb148c..71f6720 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1759,7 +1759,7 @@ static int xgmac_probe(struct platform_device *pdev) if (device_can_wakeup(priv->device)) priv->wolopts = WAKE_MAGIC; /* Magic Frame as default */ - ndev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA; + ndev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA; if (readl(priv->base + XGMAC_DMA_HW_FEATURE) & DMA_HW_FEAT_TXCOESEL) ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; -- cgit v0.10.2 From ef07387faf33a95e011993200902d490b605407d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:20 -0500 Subject: net: calxedaxgmac: read correct field in xgmac_desc_get_buf_len xgmac_desc_get_buf_len appears to have a copy/paste error. flags is the wrong field to read. We should be reading buf_size field. cpu_to_le32 should also be le32_to_cpu. This never really mattered as this function is only used for DMA mapping calls which happen to be nops with coherent DMA. Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 71f6720..d41af68 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -421,7 +421,7 @@ static inline void desc_set_buf_len(struct xgmac_dma_desc *p, u32 buf_sz) static inline int desc_get_buf_len(struct xgmac_dma_desc *p) { - u32 len = cpu_to_le32(p->flags); + u32 len = le32_to_cpu(p->buf_size); return (len & DESC_BUFFER1_SZ_MASK) + ((len & DESC_BUFFER2_SZ_MASK) >> DESC_BUFFER2_SZ_OFFSET); } -- cgit v0.10.2 From 8746f671ef04114ab25f5a35ec6219efbdf3703e Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:21 -0500 Subject: net: calxedaxgmac: fix race between xgmac_tx_complete and xgmac_tx_err It is possible for the xgmac_tx_complete to run concurrently with xgmac_tx_err since there are no locks. Fix this by moving the tx error handling to a workqueue so we can disable napi while we reset the transmitter. Reported-by: Andreas Herrmann Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index d41af68..df7e3e2 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -393,6 +393,7 @@ struct xgmac_priv { char rx_pause; char tx_pause; int wolopts; + struct work_struct tx_timeout_work; }; /* XGMAC Configuration Settings */ @@ -897,21 +898,18 @@ static void xgmac_tx_complete(struct xgmac_priv *priv) netif_wake_queue(priv->dev); } -/** - * xgmac_tx_err: - * @priv: pointer to the private device structure - * Description: it cleans the descriptors and restarts the transmission - * in case of errors. - */ -static void xgmac_tx_err(struct xgmac_priv *priv) +static void xgmac_tx_timeout_work(struct work_struct *work) { - u32 reg, value, inten; + u32 reg, value; + struct xgmac_priv *priv = + container_of(work, struct xgmac_priv, tx_timeout_work); - netif_stop_queue(priv->dev); + napi_disable(&priv->napi); - inten = readl(priv->base + XGMAC_DMA_INTR_ENA); writel(0, priv->base + XGMAC_DMA_INTR_ENA); + netif_tx_lock(priv->dev); + reg = readl(priv->base + XGMAC_DMA_CONTROL); writel(reg & ~DMA_CONTROL_ST, priv->base + XGMAC_DMA_CONTROL); do { @@ -927,9 +925,15 @@ static void xgmac_tx_err(struct xgmac_priv *priv) writel(DMA_STATUS_TU | DMA_STATUS_TPS | DMA_STATUS_NIS | DMA_STATUS_AIS, priv->base + XGMAC_DMA_STATUS); - writel(inten, priv->base + XGMAC_DMA_INTR_ENA); + netif_tx_unlock(priv->dev); netif_wake_queue(priv->dev); + + napi_enable(&priv->napi); + + /* Enable interrupts */ + writel(DMA_INTR_DEFAULT_MASK, priv->base + XGMAC_DMA_STATUS); + writel(DMA_INTR_DEFAULT_MASK, priv->base + XGMAC_DMA_INTR_ENA); } static int xgmac_hw_init(struct net_device *dev) @@ -1225,9 +1229,7 @@ static int xgmac_poll(struct napi_struct *napi, int budget) static void xgmac_tx_timeout(struct net_device *dev) { struct xgmac_priv *priv = netdev_priv(dev); - - /* Clear Tx resources and restart transmitting again */ - xgmac_tx_err(priv); + schedule_work(&priv->tx_timeout_work); } /** @@ -1366,7 +1368,6 @@ static irqreturn_t xgmac_pmt_interrupt(int irq, void *dev_id) static irqreturn_t xgmac_interrupt(int irq, void *dev_id) { u32 intr_status; - bool tx_err = false; struct net_device *dev = (struct net_device *)dev_id; struct xgmac_priv *priv = netdev_priv(dev); struct xgmac_extra_stats *x = &priv->xstats; @@ -1396,16 +1397,12 @@ static irqreturn_t xgmac_interrupt(int irq, void *dev_id) if (intr_status & DMA_STATUS_TPS) { netdev_err(priv->dev, "transmit process stopped\n"); x->tx_process_stopped++; - tx_err = true; + schedule_work(&priv->tx_timeout_work); } if (intr_status & DMA_STATUS_FBI) { netdev_err(priv->dev, "fatal bus error\n"); x->fatal_bus_error++; - tx_err = true; } - - if (tx_err) - xgmac_tx_err(priv); } /* TX/RX NORMAL interrupts */ @@ -1708,6 +1705,7 @@ static int xgmac_probe(struct platform_device *pdev) ndev->netdev_ops = &xgmac_netdev_ops; SET_ETHTOOL_OPS(ndev, &xgmac_ethtool_ops); spin_lock_init(&priv->stats_lock); + INIT_WORK(&priv->tx_timeout_work, xgmac_tx_timeout_work); priv->device = &pdev->dev; priv->dev = ndev; -- cgit v0.10.2 From 1a1d4d2f30a3c7e91bb7876df95baae363be5434 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:22 -0500 Subject: net: calxedaxgmac: fix possible skb free before tx complete The TX completion code may have freed an skb before the entire sg list was transmitted. The DMA unmap calls for the fragments could also get skipped. Now set the skb pointer on every entry in the ring, not just the head of the sg list. We then use the FS (first segment) bit in the descriptors to determine skb head vs. fragment. This also fixes similar bug in xgmac_free_tx_skbufs where clean-up of a sg list that wraps at the end of the ring buffer would not get unmapped. Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index df7e3e2..64854ad 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -470,6 +470,11 @@ static inline int desc_get_tx_ls(struct xgmac_dma_desc *p) return le32_to_cpu(p->flags) & TXDESC_LAST_SEG; } +static inline int desc_get_tx_fs(struct xgmac_dma_desc *p) +{ + return le32_to_cpu(p->flags) & TXDESC_FIRST_SEG; +} + static inline u32 desc_get_buf_addr(struct xgmac_dma_desc *p) { return le32_to_cpu(p->buf1_addr); @@ -796,7 +801,7 @@ static void xgmac_free_rx_skbufs(struct xgmac_priv *priv) static void xgmac_free_tx_skbufs(struct xgmac_priv *priv) { - int i, f; + int i; struct xgmac_dma_desc *p; if (!priv->tx_skbuff) @@ -807,16 +812,15 @@ static void xgmac_free_tx_skbufs(struct xgmac_priv *priv) continue; p = priv->dma_tx + i; - dma_unmap_single(priv->device, desc_get_buf_addr(p), - desc_get_buf_len(p), DMA_TO_DEVICE); - - for (f = 0; f < skb_shinfo(priv->tx_skbuff[i])->nr_frags; f++) { - p = priv->dma_tx + i++; + if (desc_get_tx_fs(p)) + dma_unmap_single(priv->device, desc_get_buf_addr(p), + desc_get_buf_len(p), DMA_TO_DEVICE); + else dma_unmap_page(priv->device, desc_get_buf_addr(p), desc_get_buf_len(p), DMA_TO_DEVICE); - } - dev_kfree_skb_any(priv->tx_skbuff[i]); + if (desc_get_tx_ls(p)) + dev_kfree_skb_any(priv->tx_skbuff[i]); priv->tx_skbuff[i] = NULL; } } @@ -853,8 +857,6 @@ static void xgmac_free_dma_desc_rings(struct xgmac_priv *priv) */ static void xgmac_tx_complete(struct xgmac_priv *priv) { - int i; - while (dma_ring_cnt(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ)) { unsigned int entry = priv->tx_tail; struct sk_buff *skb = priv->tx_skbuff[entry]; @@ -864,33 +866,24 @@ static void xgmac_tx_complete(struct xgmac_priv *priv) if (desc_get_owner(p)) break; - /* Verify tx error by looking at the last segment */ - if (desc_get_tx_ls(p)) - desc_get_tx_status(priv, p); - netdev_dbg(priv->dev, "tx ring: curr %d, dirty %d\n", priv->tx_head, priv->tx_tail); - dma_unmap_single(priv->device, desc_get_buf_addr(p), - desc_get_buf_len(p), DMA_TO_DEVICE); - - priv->tx_skbuff[entry] = NULL; - priv->tx_tail = dma_ring_incr(entry, DMA_TX_RING_SZ); - - if (!skb) { - continue; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - entry = priv->tx_tail = dma_ring_incr(priv->tx_tail, - DMA_TX_RING_SZ); - p = priv->dma_tx + priv->tx_tail; - + if (desc_get_tx_fs(p)) + dma_unmap_single(priv->device, desc_get_buf_addr(p), + desc_get_buf_len(p), DMA_TO_DEVICE); + else dma_unmap_page(priv->device, desc_get_buf_addr(p), desc_get_buf_len(p), DMA_TO_DEVICE); + + /* Check tx error on the last segment */ + if (desc_get_tx_ls(p)) { + desc_get_tx_status(priv, p); + dev_kfree_skb(skb); } - dev_kfree_skb(skb); + priv->tx_skbuff[entry] = NULL; + priv->tx_tail = dma_ring_incr(entry, DMA_TX_RING_SZ); } if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) > @@ -1110,7 +1103,7 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) entry = dma_ring_incr(entry, DMA_TX_RING_SZ); desc = priv->dma_tx + entry; - priv->tx_skbuff[entry] = NULL; + priv->tx_skbuff[entry] = skb; desc_set_buf_addr_and_size(desc, paddr, len); if (i < (nfrags - 1)) -- cgit v0.10.2 From ca32723afedd65d612029705446bf44bde5eab14 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:23 -0500 Subject: net: calxedaxgmac: update ring buffer tx_head after barriers Ensure that the descriptor writes are visible before the ring buffer head is updated. Since writel is a barrier, we can simply update the head after the writel. Reported-by: Lennert Buytenhek Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 64854ad..f630855 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1121,9 +1121,10 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) wmb(); desc_set_tx_owner(first, desc_flags | TXDESC_FIRST_SEG); + writel(1, priv->base + XGMAC_DMA_TX_POLL); + priv->tx_head = dma_ring_incr(entry, DMA_TX_RING_SZ); - writel(1, priv->base + XGMAC_DMA_TX_POLL); if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) < MAX_SKB_FRAGS) netif_stop_queue(dev); -- cgit v0.10.2 From cbe157b60c8e70d4b4dc937dfdd39525d8f47b46 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:24 -0500 Subject: net: calxedaxgmac: fix race with tx queue stop/wake Since the xgmac transmit start and completion work locklessly, it is possible for xgmac_xmit to stop the tx queue after the xgmac_tx_complete has run resulting in the tx queue never being woken up. Fix this by ensuring that ring buffer index updates are visible and recheck the ring space after stopping the queue. Also fix an off-by-one bug where we need to stop the queue when the ring buffer space is equal to MAX_SKB_FRAGS. The implementation used here was copied from drivers/net/ethernet/broadcom/tg3.c. Signed-off-by: Rob Herring Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index f630855..5d0b61a 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -410,6 +410,9 @@ struct xgmac_priv { #define dma_ring_space(h, t, s) CIRC_SPACE(h, t, s) #define dma_ring_cnt(h, t, s) CIRC_CNT(h, t, s) +#define tx_dma_ring_space(p) \ + dma_ring_space((p)->tx_head, (p)->tx_tail, DMA_TX_RING_SZ) + /* XGMAC Descriptor Access Helpers */ static inline void desc_set_buf_len(struct xgmac_dma_desc *p, u32 buf_sz) { @@ -886,8 +889,10 @@ static void xgmac_tx_complete(struct xgmac_priv *priv) priv->tx_tail = dma_ring_incr(entry, DMA_TX_RING_SZ); } - if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) > - MAX_SKB_FRAGS) + /* Ensure tx_tail is visible to xgmac_xmit */ + smp_mb(); + if (unlikely(netif_queue_stopped(priv->dev) && + (tx_dma_ring_space(priv) > MAX_SKB_FRAGS))) netif_wake_queue(priv->dev); } @@ -1125,10 +1130,15 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) priv->tx_head = dma_ring_incr(entry, DMA_TX_RING_SZ); - if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) < - MAX_SKB_FRAGS) + /* Ensure tx_head update is visible to tx completion */ + smp_mb(); + if (unlikely(tx_dma_ring_space(priv) <= MAX_SKB_FRAGS)) { netif_stop_queue(dev); - + /* Ensure netif_stop_queue is visible to tx completion */ + smp_mb(); + if (tx_dma_ring_space(priv) > MAX_SKB_FRAGS) + netif_start_queue(dev); + } return NETDEV_TX_OK; } -- cgit v0.10.2 From f7ea10520d7dacbc416d130c4b10505c66bf4c36 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:25 -0500 Subject: net: calxedaxgmac: enable interrupts after napi_enable Fix a race condition where the interrupt handler may have called napi_schedule before napi_enable is called. This would disable interrupts and never actually schedule napi to run. Reported-by: Lennert Buytenhek Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 5d0b61a..73d3f83 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -959,9 +959,7 @@ static int xgmac_hw_init(struct net_device *dev) DMA_BUS_MODE_FB | DMA_BUS_MODE_ATDS | DMA_BUS_MODE_AAL; writel(value, ioaddr + XGMAC_DMA_BUS_MODE); - /* Enable interrupts */ - writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_STATUS); - writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_INTR_ENA); + writel(0, ioaddr + XGMAC_DMA_INTR_ENA); /* Mask power mgt interrupt */ writel(XGMAC_INT_STAT_PMTIM, ioaddr + XGMAC_INT_STAT); @@ -1029,6 +1027,10 @@ static int xgmac_open(struct net_device *dev) napi_enable(&priv->napi); netif_start_queue(dev); + /* Enable interrupts */ + writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_STATUS); + writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_INTR_ENA); + return 0; } -- cgit v0.10.2 From 2ee68f621af280c37a6bd72fe75e47de0750f301 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:26 -0500 Subject: net: calxedaxgmac: fix various errors in xgmac_set_rx_mode Fix xgmac_set_rx_mode to handle several conditions that were not handled correctly as Lennert Buytenhek describes: If we have, say, 100 unicast addresses, and 5 multicast addresses, the unicast address count check will evaluate to true, and set use_hash to true. The multicast address check will however evaluate to false, and use_hash won't be set to true again, and XGMAC_FRAME_FILTER_HMC won't be OR'd into XGMAC_FRAME_FILTER, but since use_hash was still true from the unicast check, netdev_for_each_mc_addr() will program the multicast addresses into the hash table instead of using the MAC address registers, but since the HMC bit wasn't set, the hash table won't be checked for multicast addresses on receive, and we'll stop receiving multicast packets entirely. Also, there is no code that zeroes out MAC address registers reg..31 at the end of this function, meaning that under the right conditions, unicast/multicast addresses that were previously in the filter but were then deleted won't be cleared out. Reported-by: Lennert Buytenhek Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 73d3f83..0cff9e3 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -618,10 +618,15 @@ static void xgmac_set_mac_addr(void __iomem *ioaddr, unsigned char *addr, { u32 data; - data = (addr[5] << 8) | addr[4] | (num ? XGMAC_ADDR_AE : 0); - writel(data, ioaddr + XGMAC_ADDR_HIGH(num)); - data = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0]; - writel(data, ioaddr + XGMAC_ADDR_LOW(num)); + if (addr) { + data = (addr[5] << 8) | addr[4] | (num ? XGMAC_ADDR_AE : 0); + writel(data, ioaddr + XGMAC_ADDR_HIGH(num)); + data = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0]; + writel(data, ioaddr + XGMAC_ADDR_LOW(num)); + } else { + writel(0, ioaddr + XGMAC_ADDR_HIGH(num)); + writel(0, ioaddr + XGMAC_ADDR_LOW(num)); + } } static void xgmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, @@ -1294,6 +1299,8 @@ static void xgmac_set_rx_mode(struct net_device *dev) if ((netdev_mc_count(dev) + reg - 1) > XGMAC_MAX_FILTER_ADDR) { use_hash = true; value |= XGMAC_FRAME_FILTER_HMC | XGMAC_FRAME_FILTER_HPF; + } else { + use_hash = false; } netdev_for_each_mc_addr(ha, dev) { if (use_hash) { @@ -1310,6 +1317,8 @@ static void xgmac_set_rx_mode(struct net_device *dev) } out: + for (i = reg; i < XGMAC_MAX_FILTER_ADDR; i++) + xgmac_set_mac_addr(ioaddr, NULL, reg); for (i = 0; i < XGMAC_NUM_HASH; i++) writel(hash_filter[i], ioaddr + XGMAC_HASH(i)); -- cgit v0.10.2 From d2a5128fbc30480763bbb8a43ec7c8161c1bf055 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:27 -0500 Subject: net: calxedaxgmac: remove some unused statistic counters rx_sa_filter_fail and tx_undeflow events are unused and impossible to occur based on how the h/w is used. We never filter on source MAC address and TX store and forward mode prevents underflow events. Reported-by: Lennert Buytenhek Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 0cff9e3..04c585c 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -353,11 +353,9 @@ struct xgmac_extra_stats { /* Receive errors */ unsigned long rx_watchdog; unsigned long rx_da_filter_fail; - unsigned long rx_sa_filter_fail; unsigned long rx_payload_error; unsigned long rx_ip_header_error; /* Tx/Rx IRQ errors */ - unsigned long tx_undeflow; unsigned long tx_process_stopped; unsigned long rx_buf_unav; unsigned long rx_process_stopped; @@ -1581,7 +1579,6 @@ static const struct xgmac_stats xgmac_gstrings_stats[] = { XGMAC_STAT(rx_payload_error), XGMAC_STAT(rx_ip_header_error), XGMAC_STAT(rx_da_filter_fail), - XGMAC_STAT(rx_sa_filter_fail), XGMAC_STAT(fatal_bus_error), XGMAC_HW_STAT(rx_watchdog, XGMAC_MMC_RXWATCHDOG), XGMAC_HW_STAT(tx_vlan, XGMAC_MMC_TXVLANFRAME), -- cgit v0.10.2 From 531cda20cd44d5a7a77debaaeaa407d4802d7e05 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:28 -0500 Subject: net: calxedaxgmac: fix rx DMA mapping API size mismatches Fix the mismatch in the DMA mapping and unmapping sizes for receive. The unmap size must be equal to the map size and should not be the actual received frame length. The map size should also be adjusted by the NET_IP_ALIGN size since the h/w buffer size (dma_buf_sz) includes this offset. Also, add a missing dma_mapping_error check in xgmac_rx_refill. Reported-by: Lennert Buytenhek Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 04c585c..cd5010b 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -695,9 +695,14 @@ static void xgmac_rx_refill(struct xgmac_priv *priv) if (unlikely(skb == NULL)) break; - priv->rx_skbuff[entry] = skb; paddr = dma_map_single(priv->device, skb->data, - bufsz, DMA_FROM_DEVICE); + priv->dma_buf_sz - NET_IP_ALIGN, + DMA_FROM_DEVICE); + if (dma_mapping_error(priv->device, paddr)) { + dev_kfree_skb_any(skb); + break; + } + priv->rx_skbuff[entry] = skb; desc_set_buf_addr(p, paddr, priv->dma_buf_sz); } @@ -794,13 +799,14 @@ static void xgmac_free_rx_skbufs(struct xgmac_priv *priv) return; for (i = 0; i < DMA_RX_RING_SZ; i++) { - if (priv->rx_skbuff[i] == NULL) + struct sk_buff *skb = priv->rx_skbuff[i]; + if (skb == NULL) continue; p = priv->dma_rx + i; dma_unmap_single(priv->device, desc_get_buf_addr(p), - priv->dma_buf_sz, DMA_FROM_DEVICE); - dev_kfree_skb_any(priv->rx_skbuff[i]); + priv->dma_buf_sz - NET_IP_ALIGN, DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); priv->rx_skbuff[i] = NULL; } } @@ -1187,7 +1193,7 @@ static int xgmac_rx(struct xgmac_priv *priv, int limit) skb_put(skb, frame_len); dma_unmap_single(priv->device, desc_get_buf_addr(p), - frame_len, DMA_FROM_DEVICE); + priv->dma_buf_sz - NET_IP_ALIGN, DMA_FROM_DEVICE); skb->protocol = eth_type_trans(skb, priv->dev); skb->ip_summed = ip_checksum; -- cgit v0.10.2 From 92cd4253e553e87a715243a36623945edcbf1ec7 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 30 Aug 2013 16:49:29 -0500 Subject: net: calxedaxgmac: fix xgmac_xmit DMA mapping error handling On a DMA mapping error in xgmac_xmit, we should simply free the skb and return NETDEV_TX_OK rather than -EIO. In the case of errors in mapping frags, we need to undo everything that has been setup. Reported-by: Andreas Herrmann Signed-off-by: Rob Herring Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index cd5010b..78d6d6b 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -466,6 +466,13 @@ static inline void desc_set_tx_owner(struct xgmac_dma_desc *p, u32 flags) p->flags = cpu_to_le32(tmpflags); } +static inline void desc_clear_tx_owner(struct xgmac_dma_desc *p) +{ + u32 tmpflags = le32_to_cpu(p->flags); + tmpflags &= TXDESC_END_RING; + p->flags = cpu_to_le32(tmpflags); +} + static inline int desc_get_tx_ls(struct xgmac_dma_desc *p) { return le32_to_cpu(p->flags) & TXDESC_LAST_SEG; @@ -1100,7 +1107,7 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) paddr = dma_map_single(priv->device, skb->data, len, DMA_TO_DEVICE); if (dma_mapping_error(priv->device, paddr)) { dev_kfree_skb(skb); - return -EIO; + return NETDEV_TX_OK; } priv->tx_skbuff[entry] = skb; desc_set_buf_addr_and_size(desc, paddr, len); @@ -1112,10 +1119,8 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) paddr = skb_frag_dma_map(priv->device, frag, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(priv->device, paddr)) { - dev_kfree_skb(skb); - return -EIO; - } + if (dma_mapping_error(priv->device, paddr)) + goto dma_err; entry = dma_ring_incr(entry, DMA_TX_RING_SZ); desc = priv->dma_tx + entry; @@ -1151,6 +1156,22 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) netif_start_queue(dev); } return NETDEV_TX_OK; + +dma_err: + entry = priv->tx_head; + for ( ; i > 0; i--) { + entry = dma_ring_incr(entry, DMA_TX_RING_SZ); + desc = priv->dma_tx + entry; + priv->tx_skbuff[entry] = NULL; + dma_unmap_page(priv->device, desc_get_buf_addr(desc), + desc_get_buf_len(desc), DMA_TO_DEVICE); + desc_clear_tx_owner(desc); + } + desc = first; + dma_unmap_single(priv->device, desc_get_buf_addr(desc), + desc_get_buf_len(desc), DMA_TO_DEVICE); + dev_kfree_skb(skb); + return NETDEV_TX_OK; } static int xgmac_rx(struct xgmac_priv *priv, int limit) -- cgit v0.10.2 From 989038e217e94161862a959e82f9a1ecf8dda152 Mon Sep 17 00:00:00 2001 From: Nithin Sujir Date: Fri, 30 Aug 2013 17:01:36 -0700 Subject: tg3: Don't turn off led on 5719 serdes port 0 Turning off led on port 0 of the 5719 serdes causes all other ports to lose power and stop functioning. Add tg3_phy_led_bug() function to check for this condition. We use a switch() in tg3_phy_led_bug() for consistency with the tg3_phy_power_bug() function. Signed-off-by: Nithin Nayak Sujir Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 0da2214..9d289d3 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -3030,6 +3030,19 @@ static bool tg3_phy_power_bug(struct tg3 *tp) return false; } +static bool tg3_phy_led_bug(struct tg3 *tp) +{ + switch (tg3_asic_rev(tp)) { + case ASIC_REV_5719: + if ((tp->phy_flags & TG3_PHYFLG_MII_SERDES) && + !tp->pci_fn) + return true; + return false; + } + + return false; +} + static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power) { u32 val; @@ -3077,8 +3090,9 @@ static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power) } return; } else if (do_low_power) { - tg3_writephy(tp, MII_TG3_EXT_CTRL, - MII_TG3_EXT_CTRL_FORCE_LED_OFF); + if (!tg3_phy_led_bug(tp)) + tg3_writephy(tp, MII_TG3_EXT_CTRL, + MII_TG3_EXT_CTRL_FORCE_LED_OFF); val = MII_TG3_AUXCTL_PCTL_100TX_LPWR | MII_TG3_AUXCTL_PCTL_SPR_ISOLATE | -- cgit v0.10.2 From 061ba049abc604e5690198bfa2c47c71c2ba725c Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 2 Sep 2013 10:20:02 +0800 Subject: drivers: net: ethernet: 8390: Kconfig: add H8300H_AKI3068NET and H8300H_H8MAX dependancy for NE_H8300 Currently only H8300H_AKI3068NET and H8300H_H8MAX define default I/O base and IRQ values for the NE_H8300 driver. Hence builds for other H8300H platforms will fail as per below. Since H8300H does not support multi platform builds, we simply limit building the driver to those two platforms specifically. The release error: drivers/net/ethernet/8390/ne-h8300.c: In function 'init_dev': drivers/net/ethernet/8390/ne-h8300.c:117:23: error: 'h8300_ne_base' undeclared (first use in this function) drivers/net/ethernet/8390/ne-h8300.c:117:23: note: each undeclared identifier is reported only once for each function it appears in drivers/net/ethernet/8390/ne-h8300.c:117:23: error: bit-field '' width not an integer constant drivers/net/ethernet/8390/ne-h8300.c:119:20: error: 'h8300_ne_irq' undeclared (first use in this function) drivers/net/ethernet/8390/ne-h8300.c: In function 'init_module': drivers/net/ethernet/8390/ne-h8300.c:647:21: error: 'h8300_ne_base' undeclared (first use in this function) drivers/net/ethernet/8390/ne-h8300.c:648:15: error: 'h8300_ne_irq' undeclared (first use in this function) drivers/net/ethernet/8390/ne-h8300.c:661:4: warning: format '%x' expects argument of type 'unsigned int', but argument 2 has type 'long unsigned int' [-Wformat] Signed-off-by: Chen Gang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index a5f91e1..becef25 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -148,7 +148,7 @@ config PCMCIA_PCNET config NE_H8300 tristate "NE2000 compatible support for H8/300" - depends on H8300 + depends on H8300H_AKI3068NET || H8300H_H8MAX ---help--- Say Y here if you want to use the NE2000 compatible controller on the Renesas H8/300 processor. -- cgit v0.10.2 From a8e9fd0f7462f5cada5189513d12fe6c9cce2105 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 3 Sep 2013 03:03:10 +0400 Subject: sh_eth: NAPI requires netif_receive_skb() Driver supporting NAPI should use NAPI-specific function for receiving packets, so netif_rx() should be changed to netif_receive_skb(). Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index a753928..1ccd595 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1299,7 +1299,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) skb_reserve(skb, NET_IP_ALIGN); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); - netif_rx(skb); + netif_receive_skb(skb); ndev->stats.rx_packets++; ndev->stats.rx_bytes += pkt_len; } -- cgit v0.10.2 From 639739b5e609a5074839bb22fc061b37baa06269 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 3 Sep 2013 02:13:31 +0200 Subject: ipv6: fix null pointer dereference in __ip6addrlbl_add Commit b67bfe0d42cac56c512dd5da4b1b347a23f4b70a ("hlist: drop the node parameter from iterators") changed the behavior of hlist_for_each_entry_safe to leave the p argument NULL. Fix this up by tracking the last argument. Reported-by: Michele Baldessari Cc: Hideaki YOSHIFUJI Cc: Sasha Levin Signed-off-by: Hannes Frederic Sowa Tested-by: Michele Baldessari Signed-off-by: David S. Miller diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f083a58..b30ad37 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, /* add a label */ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) { + struct hlist_node *n; + struct ip6addrlbl_entry *last = NULL, *p = NULL; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", - __func__, - newp, replace); + ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, + replace); - if (hlist_empty(&ip6addrlbl_table.head)) { - hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); - } else { - struct hlist_node *n; - struct ip6addrlbl_entry *p = NULL; - hlist_for_each_entry_safe(p, n, - &ip6addrlbl_table.head, list) { - if (p->prefixlen == newp->prefixlen && - net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && - p->ifindex == newp->ifindex && - ipv6_addr_equal(&p->prefix, &newp->prefix)) { - if (!replace) { - ret = -EEXIST; - goto out; - } - hlist_replace_rcu(&p->list, &newp->list); - ip6addrlbl_put(p); - goto out; - } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || - (p->prefixlen < newp->prefixlen)) { - hlist_add_before_rcu(&newp->list, &p->list); + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + if (p->prefixlen == newp->prefixlen && + net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && + p->ifindex == newp->ifindex && + ipv6_addr_equal(&p->prefix, &newp->prefix)) { + if (!replace) { + ret = -EEXIST; goto out; } + hlist_replace_rcu(&p->list, &newp->list); + ip6addrlbl_put(p); + goto out; + } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || + (p->prefixlen < newp->prefixlen)) { + hlist_add_before_rcu(&newp->list, &p->list); + goto out; } - hlist_add_after_rcu(&p->list, &newp->list); + last = p; } + if (last) + hlist_add_after_rcu(&last->list, &newp->list); + else + hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); out: if (!ret) ip6addrlbl_table.seq++; -- cgit v0.10.2 From 36e24e2ee29e25480b4be4a9ea467f4739be22fb Mon Sep 17 00:00:00 2001 From: Duan Fugang-B38611 Date: Tue, 3 Sep 2013 10:41:18 +0800 Subject: net: fec: fix the error to get the previous BD entry Bug: error to get the previous BD entry. When the current BD is the first BD, the previous BD entry must be the last BD, not "bdp - 1" in current logic. V4: * Optimize fec_enet_get_nextdesc() for code clean. Replace "ex_new_bd - ring_size" with "ex_base". Replace "new_bd - ring_size" with "base". V3: * Restore the API name because David suggest to use fec_enet_ prefix for all function in fec driver. So, change next_bd() -> fec_enet_get_nextdesc() change pre_bd() -> fec_enet_get_prevdesc() * Reduce the two APIs parameters for easy to call. V2: * Add tx_ring_size and rx_ring_size to struct fec_enet_private. * Replace api fec_enet_get_nextdesc() with next_bd(). Replace api fec_enet_get_prevdesc() with pre_bd(). * Move all ring size check logic to next_bd() and pre_bd(), which simplifies the code redundancy. V1: * Add BD ring size check to get the previous BD entry in correctly. Reviewed-by: Li Frank Signed-off-by: Fugang Duan Acked-by: Frank Li Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index ae23600..0120217 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -296,6 +296,9 @@ struct fec_enet_private { /* The ring entries to be free()ed */ struct bufdesc *dirty_tx; + unsigned short tx_ring_size; + unsigned short rx_ring_size; + struct platform_device *pdev; int opened; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c610a27..fd90bf5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -239,22 +239,57 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); static int mii_cnt; -static struct bufdesc *fec_enet_get_nextdesc(struct bufdesc *bdp, int is_ex) +static inline +struct bufdesc *fec_enet_get_nextdesc(struct bufdesc *bdp, struct fec_enet_private *fep) { - struct bufdesc_ex *ex = (struct bufdesc_ex *)bdp; - if (is_ex) - return (struct bufdesc *)(ex + 1); + struct bufdesc *new_bd = bdp + 1; + struct bufdesc_ex *ex_new_bd = (struct bufdesc_ex *)bdp + 1; + struct bufdesc_ex *ex_base; + struct bufdesc *base; + int ring_size; + + if (bdp >= fep->tx_bd_base) { + base = fep->tx_bd_base; + ring_size = fep->tx_ring_size; + ex_base = (struct bufdesc_ex *)fep->tx_bd_base; + } else { + base = fep->rx_bd_base; + ring_size = fep->rx_ring_size; + ex_base = (struct bufdesc_ex *)fep->rx_bd_base; + } + + if (fep->bufdesc_ex) + return (struct bufdesc *)((ex_new_bd >= (ex_base + ring_size)) ? + ex_base : ex_new_bd); else - return bdp + 1; + return (new_bd >= (base + ring_size)) ? + base : new_bd; } -static struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, int is_ex) +static inline +struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, struct fec_enet_private *fep) { - struct bufdesc_ex *ex = (struct bufdesc_ex *)bdp; - if (is_ex) - return (struct bufdesc *)(ex - 1); + struct bufdesc *new_bd = bdp - 1; + struct bufdesc_ex *ex_new_bd = (struct bufdesc_ex *)bdp - 1; + struct bufdesc_ex *ex_base; + struct bufdesc *base; + int ring_size; + + if (bdp >= fep->tx_bd_base) { + base = fep->tx_bd_base; + ring_size = fep->tx_ring_size; + ex_base = (struct bufdesc_ex *)fep->tx_bd_base; + } else { + base = fep->rx_bd_base; + ring_size = fep->rx_ring_size; + ex_base = (struct bufdesc_ex *)fep->rx_bd_base; + } + + if (fep->bufdesc_ex) + return (struct bufdesc *)((ex_new_bd < ex_base) ? + (ex_new_bd + ring_size) : ex_new_bd); else - return bdp - 1; + return (new_bd < base) ? (new_bd + ring_size) : new_bd; } static void *swap_buffer(void *bufaddr, int len) @@ -380,7 +415,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } } - bdp_pre = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp_pre = fec_enet_get_prevdesc(bdp, fep); if ((id_entry->driver_data & FEC_QUIRK_ERR006358) && !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) { fep->delay_work.trig_tx = true; @@ -389,10 +424,7 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } /* If this was the last BD in the ring, start at the beginning again. */ - if (status & BD_ENET_TX_WRAP) - bdp = fep->tx_bd_base; - else - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); fep->cur_tx = bdp; @@ -417,18 +449,18 @@ static void fec_enet_bd_init(struct net_device *dev) /* Initialize the receive buffer descriptors. */ bdp = fep->rx_bd_base; - for (i = 0; i < RX_RING_SIZE; i++) { + for (i = 0; i < fep->rx_ring_size; i++) { /* Initialize the BD for every fragment in the page. */ if (bdp->cbd_bufaddr) bdp->cbd_sc = BD_ENET_RX_EMPTY; else bdp->cbd_sc = 0; - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); } /* Set the last buffer to wrap */ - bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_prevdesc(bdp, fep); bdp->cbd_sc |= BD_SC_WRAP; fep->cur_rx = fep->rx_bd_base; @@ -436,7 +468,7 @@ static void fec_enet_bd_init(struct net_device *dev) /* ...and the same for transmit */ bdp = fep->tx_bd_base; fep->cur_tx = bdp; - for (i = 0; i < TX_RING_SIZE; i++) { + for (i = 0; i < fep->tx_ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = 0; @@ -445,11 +477,11 @@ static void fec_enet_bd_init(struct net_device *dev) fep->tx_skbuff[i] = NULL; } bdp->cbd_bufaddr = 0; - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); } /* Set the last buffer to wrap */ - bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_prevdesc(bdp, fep); bdp->cbd_sc |= BD_SC_WRAP; fep->dirty_tx = bdp; } @@ -510,10 +542,10 @@ fec_restart(struct net_device *ndev, int duplex) writel(fep->bd_dma, fep->hwp + FEC_R_DES_START); if (fep->bufdesc_ex) writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc_ex) - * RX_RING_SIZE, fep->hwp + FEC_X_DES_START); + * fep->rx_ring_size, fep->hwp + FEC_X_DES_START); else writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc) - * RX_RING_SIZE, fep->hwp + FEC_X_DES_START); + * fep->rx_ring_size, fep->hwp + FEC_X_DES_START); for (i = 0; i <= TX_RING_MOD_MASK; i++) { @@ -727,10 +759,7 @@ fec_enet_tx(struct net_device *ndev) bdp = fep->dirty_tx; /* get next bdp of dirty_tx */ - if (bdp->cbd_sc & BD_ENET_TX_WRAP) - bdp = fep->tx_bd_base; - else - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) { @@ -800,10 +829,7 @@ fec_enet_tx(struct net_device *ndev) fep->dirty_tx = bdp; /* Update pointer to next buffer descriptor to be transmitted */ - if (status & BD_ENET_TX_WRAP) - bdp = fep->tx_bd_base; - else - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); /* Since we have freed up a buffer, the ring is no longer full */ @@ -993,10 +1019,8 @@ rx_processing_done: } /* Update BD pointer to next entry */ - if (status & BD_ENET_RX_WRAP) - bdp = fep->rx_bd_base; - else - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); + /* Doing this here will keep the FEC running while we process * incoming frames. On a heavily loaded network, we should be * able to keep up at the expense of system resources. @@ -1662,7 +1686,7 @@ static void fec_enet_free_buffers(struct net_device *ndev) struct bufdesc *bdp; bdp = fep->rx_bd_base; - for (i = 0; i < RX_RING_SIZE; i++) { + for (i = 0; i < fep->rx_ring_size; i++) { skb = fep->rx_skbuff[i]; if (bdp->cbd_bufaddr) @@ -1670,11 +1694,11 @@ static void fec_enet_free_buffers(struct net_device *ndev) FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); if (skb) dev_kfree_skb(skb); - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); } bdp = fep->tx_bd_base; - for (i = 0; i < TX_RING_SIZE; i++) + for (i = 0; i < fep->tx_ring_size; i++) kfree(fep->tx_bounce[i]); } @@ -1686,7 +1710,7 @@ static int fec_enet_alloc_buffers(struct net_device *ndev) struct bufdesc *bdp; bdp = fep->rx_bd_base; - for (i = 0; i < RX_RING_SIZE; i++) { + for (i = 0; i < fep->rx_ring_size; i++) { skb = netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE); if (!skb) { fec_enet_free_buffers(ndev); @@ -1703,15 +1727,15 @@ static int fec_enet_alloc_buffers(struct net_device *ndev) ebdp->cbd_esc = BD_ENET_RX_INT; } - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); } /* Set the last buffer to wrap. */ - bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_prevdesc(bdp, fep); bdp->cbd_sc |= BD_SC_WRAP; bdp = fep->tx_bd_base; - for (i = 0; i < TX_RING_SIZE; i++) { + for (i = 0; i < fep->tx_ring_size; i++) { fep->tx_bounce[i] = kmalloc(FEC_ENET_TX_FRSIZE, GFP_KERNEL); bdp->cbd_sc = 0; @@ -1722,11 +1746,11 @@ static int fec_enet_alloc_buffers(struct net_device *ndev) ebdp->cbd_esc = BD_ENET_TX_INT; } - bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_nextdesc(bdp, fep); } /* Set the last buffer to wrap. */ - bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp = fec_enet_get_prevdesc(bdp, fep); bdp->cbd_sc |= BD_SC_WRAP; return 0; @@ -1966,13 +1990,17 @@ static int fec_enet_init(struct net_device *ndev) /* Get the Ethernet address */ fec_get_mac(ndev); + /* init the tx & rx ring size */ + fep->tx_ring_size = TX_RING_SIZE; + fep->rx_ring_size = RX_RING_SIZE; + /* Set receive and transmit descriptor base. */ fep->rx_bd_base = cbd_base; if (fep->bufdesc_ex) fep->tx_bd_base = (struct bufdesc *) - (((struct bufdesc_ex *)cbd_base) + RX_RING_SIZE); + (((struct bufdesc_ex *)cbd_base) + fep->rx_ring_size); else - fep->tx_bd_base = cbd_base + RX_RING_SIZE; + fep->tx_bd_base = cbd_base + fep->rx_ring_size; /* The FEC Ethernet specific entries in the device structure */ ndev->watchdog_timeo = TX_TIMEOUT; -- cgit v0.10.2 From 25a6e6b84fba601eff7c28d30da8ad7cfbef0d43 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Sep 2013 13:37:01 +0200 Subject: ipv6: Don't depend on per socket memory for neighbour discovery messages Allocating skbs when sending out neighbour discovery messages currently uses sock_alloc_send_skb() based on a per net namespace socket and thus share a socket wmem buffer space. If a netdevice is temporarily unable to transmit due to carrier loss or for other reasons, the queued up ndisc messages will cosnume all of the wmem space and will thus prevent from any more skbs to be allocated even for netdevices that are able to transmit packets. The number of neighbour discovery messages sent is very limited, use of alloc_skb() bypasses the socket wmem buffer size enforcement while the manual call to skb_set_owner_w() maintains the socket reference needed for the IPv6 output path. This patch has orginally been posted by Eric Dumazet in a modified form. Signed-off-by: Thomas Graf Cc: Eric Dumazet Cc: Hannes Frederic Sowa Cc: Stephen Warren Cc: Fabio Estevam Tested-by: Fabio Estevam Tested-by: Stephen Warren Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 04d31c2..78141fa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, int tlen = dev->needed_tailroom; struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; - int err; - skb = sock_alloc_send_skb(sk, - hlen + sizeof(struct ipv6hdr) + len + tlen, - 1, &err); + skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", - __func__, err); + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", + __func__); return NULL; } @@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); skb_reset_transport_header(skb); + /* Manually assign socket ownership as we avoid calling + * sock_alloc_send_pskb() to bypass wmem buffer limits + */ + skb_set_owner_w(skb, sk); + return skb; } -- cgit v0.10.2 From 3a1c756590633c0e86df606e5c618c190926a0df Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 3 Sep 2013 19:29:12 +0200 Subject: net: ipv6: tcp: fix potential use after free in tcp_v6_do_rcv In tcp_v6_do_rcv() code, when processing pkt options, we soley work on our skb clone opt_skb that we've created earlier before entering tcp_rcv_established() on our way. However, only in condition ... if (np->rxopt.bits.rxtclass) np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ... we work on skb itself. As we extract every other information out of opt_skb in ipv6_pktoptions path, this seems wrong, since skb can already be released by tcp_rcv_established() earlier on. When we try to access it in ipv6_hdr(), we will dereference freed skb. [ Bug added by commit 4c507d2897bd9b ("net: implement IP_RECVTOS for IP_PKTOPTIONS") ] Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Acked-by: Eric Dumazet Acked-by: Jiri Benc Signed-off-by: David S. Miller diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6e1649d..eeb4cb0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1427,7 +1427,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); -- cgit v0.10.2 From d2779e99468fd83ef1493c34f3803fec9aad8345 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 4 Sep 2013 02:41:27 +0400 Subject: sh_eth: fix napi_{en|dis}able() calls racing against interrupts While implementing NAPI for the driver, I overlooked the race conditions where interrupt handler might have called napi_schedule_prep() before napi_enable() was called or after napi_disable() was called. If RX interrupt happens, this would cause the endless interrupts and messages like: sh-eth eth0: ignoring interrupt, status 0x00040000, mask 0x01ff009f. The interrupt wouldn't even be masked by the kernel eventually since the handler would return IRQ_HANDLED all the time. As a fix, move napi_enable() call before request_irq() call and napi_disable() call after free_irq() call. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 1ccd595..3426d32 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1857,11 +1857,13 @@ static int sh_eth_open(struct net_device *ndev) pm_runtime_get_sync(&mdp->pdev->dev); + napi_enable(&mdp->napi); + ret = request_irq(ndev->irq, sh_eth_interrupt, mdp->cd->irq_flags, ndev->name, ndev); if (ret) { dev_err(&ndev->dev, "Can not assign IRQ number\n"); - return ret; + goto out_napi_off; } /* Descriptor set */ @@ -1879,12 +1881,12 @@ static int sh_eth_open(struct net_device *ndev) if (ret) goto out_free_irq; - napi_enable(&mdp->napi); - return ret; out_free_irq: free_irq(ndev->irq, ndev); +out_napi_off: + napi_disable(&mdp->napi); pm_runtime_put_sync(&mdp->pdev->dev); return ret; } @@ -1976,8 +1978,6 @@ static int sh_eth_close(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); - napi_disable(&mdp->napi); - netif_stop_queue(ndev); /* Disable interrupts by clearing the interrupt mask. */ @@ -1995,6 +1995,8 @@ static int sh_eth_close(struct net_device *ndev) free_irq(ndev->irq, ndev); + napi_disable(&mdp->napi); + /* Free all the skbuffs in the Rx queue. */ sh_eth_ring_free(ndev); -- cgit v0.10.2 From 714086029116b6b0a34e67ba1dd2f0d1cf26770c Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 4 Sep 2013 16:21:18 +0200 Subject: net: mvneta: properly disable HW PHY polling and ensure adjust_link() works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes a long-standing bug that has been reported by many users: on some Armada 370 platforms, only the network interface that has been used in U-Boot to tftp the kernel works properly in Linux. The other network interfaces can see a 'link up', but are unable to transmit data. The reports were generally made on the Armada 370-based Mirabox, but have also been given on the Armada 370-RD board. The network MAC in the Armada 370/XP (supported by the mvneta driver in Linux) has a functionality that allows it to continuously poll the PHY and directly update the MAC configuration accordingly (speed, duplex, etc.). The very first versions of the driver submitted for review were using this hardware mechanism, but due to this, the driver was not integrated with the kernel phylib. Following reviews, the driver was changed to use the phylib, and therefore a software based polling. In software based polling, Linux regularly talks to the PHY over the MDIO bus, and sees if the link status has changed. If it's the case then the adjust_link() callback of the driver is called to update the MAC configuration accordingly. However, it turns out that the adjust_link() callback was not configuring the hardware in a completely correct way: while it was setting the speed and duplex bits correctly, it wasn't telling the hardware to actually take into account those bits rather than what the hardware-based PHY polling mechanism has concluded. So, in fact the adjust_link() callback was basically a no-op. However, the network happened to be working because on the network interfaces used by U-Boot for tftp on Armada 370 platforms because the hardware PHY polling was enabled by the bootloader, and left enabled by Linux. However, the second network interface not used for tftp (or both network interfaces if the kernel is loaded from USB, NAND or SD card) didn't had the hardware PHY polling enabled. This patch fixes this situation by: (1) Making sure that the hardware PHY polling is disabled by clearing the MVNETA_PHY_POLLING_ENABLE bit in the MVNETA_UNIT_CONTROL register in the driver ->probe() function. (2) Making sure that the duplex and speed selections made by the adjust_link() callback are taken into account by clearing the MVNETA_GMAC_AN_SPEED_EN and MVNETA_GMAC_AN_DUPLEX_EN bits in the MVNETA_GMAC_AUTONEG_CONFIG register. This patch has been tested on Armada 370 Mirabox, and now both network interfaces are usable after boot. [ Problem introduced by commit c5aff18 ("net: mvneta: driver for Marvell Armada 370/XP network unit") ] Signed-off-by: Thomas Petazzoni Cc: Willy Tarreau Cc: Jochen De Smet Cc: Peter Sanford Cc: Ethan Tuttle Cc: Chény Yves-Gael Cc: Ryan Press Cc: Simon Guinot Cc: vdonnefort@lacie.com Cc: stable@vger.kernel.org Acked-by: Jason Cooper Tested-by: Vincent Donnefort Tested-by: Yves-Gael Cheny Tested-by: Gregory CLEMENT Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index b017818..90ab292 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -138,7 +138,9 @@ #define MVNETA_GMAC_FORCE_LINK_PASS BIT(1) #define MVNETA_GMAC_CONFIG_MII_SPEED BIT(5) #define MVNETA_GMAC_CONFIG_GMII_SPEED BIT(6) +#define MVNETA_GMAC_AN_SPEED_EN BIT(7) #define MVNETA_GMAC_CONFIG_FULL_DUPLEX BIT(12) +#define MVNETA_GMAC_AN_DUPLEX_EN BIT(13) #define MVNETA_MIB_COUNTERS_BASE 0x3080 #define MVNETA_MIB_LATE_COLLISION 0x7c #define MVNETA_DA_FILT_SPEC_MCAST 0x3400 @@ -915,6 +917,13 @@ static void mvneta_defaults_set(struct mvneta_port *pp) /* Assign port SDMA configuration */ mvreg_write(pp, MVNETA_SDMA_CONFIG, val); + /* Disable PHY polling in hardware, since we're using the + * kernel phylib to do this. + */ + val = mvreg_read(pp, MVNETA_UNIT_CONTROL); + val &= ~MVNETA_PHY_POLLING_ENABLE; + mvreg_write(pp, MVNETA_UNIT_CONTROL, val); + mvneta_set_ucast_table(pp, -1); mvneta_set_special_mcast_table(pp, -1); mvneta_set_other_mcast_table(pp, -1); @@ -2307,7 +2316,9 @@ static void mvneta_adjust_link(struct net_device *ndev) val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | MVNETA_GMAC_CONFIG_GMII_SPEED | - MVNETA_GMAC_CONFIG_FULL_DUPLEX); + MVNETA_GMAC_CONFIG_FULL_DUPLEX | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); if (phydev->duplex) val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX; -- cgit v0.10.2 From 15f594563d85825ec242df6d37e64e7dfe823df8 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 4 Sep 2013 16:26:52 +0200 Subject: net: mvneta: implement ->ndo_do_ioctl() to support PHY ioctls This commit implements the ->ndo_do_ioctl() operation so that the PHY-related ioctl() calls can work from userspace, which allows applications like mii-tool or mii-diag to do their job. Signed-off-by: Thomas Petazzoni Tested-by: Gregory CLEMENT Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 90ab292..389a854 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2451,6 +2451,21 @@ static int mvneta_stop(struct net_device *dev) return 0; } +static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mvneta_port *pp = netdev_priv(dev); + int ret; + + if (!pp->phy_dev) + return -ENOTSUPP; + + ret = phy_mii_ioctl(pp->phy_dev, ifr, cmd); + if (!ret) + mvneta_adjust_link(dev); + + return ret; +} + /* Ethtool methods */ /* Get settings (phy address, speed) for ethtools */ @@ -2569,6 +2584,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_change_mtu = mvneta_change_mtu, .ndo_tx_timeout = mvneta_tx_timeout, .ndo_get_stats64 = mvneta_get_stats64, + .ndo_do_ioctl = mvneta_ioctl, }; const struct ethtool_ops mvneta_eth_tool_ops = { -- cgit v0.10.2 From f011baf95e9d3e06e63132c9f8f7307def2e6753 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 4 Sep 2013 14:12:21 -0700 Subject: vxlan: Fix kernel panic on device delete. On vxlan device create if socket create fails vxlan device is not added to hash table. Therefore we need to check if device is in hashtable before we delete it from hlist. Following patch avoid the crash. net-next already has this fix. ---8<--- BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] vxlan_dellink+0x77/0xf0 [vxlan] PGD 42b2d9067 PUD 42e04c067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: vxlan(-) Hardware name: Dell Inc. PowerEdge R620/0KCKR5, BIOS 1.4.8 10/25/2012 task: ffff88042ecf8760 ti: ffff88042f106000 task.ti: ffff88042f106000 RIP: 0010:[] [] vxlan_dellink+0x77/0xf0 [vxlan] RSP: 0018:ffff88042f107e28 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88082af08000 RCX: ffff88083fd80000 RDX: 0000000000000000 RSI: ffff88042f107e58 RDI: ffff88042e12f810 RBP: ffff88042f107e48 R08: ffffffff8166eca0 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88082af087c0 R13: ffff88042e12f000 R14: ffff88042f107e58 R15: ffff88042f107e58 FS: 00007f4ed2de7700(0000) GS:ffff88043fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000042e076000 CR4: 00000000000407e0 Stack: ffff88082af08000 ffffffff81654848 ffffffffa05fb4e0 ffffffff81654780 ffff88042f107e98 ffffffff813b9c7a ffff88042f107e58 ffff88042f107e58 ffff88042f107e88 ffffffffa05fb4e0 ffffffffa05fb780 ffff88042f107f18 Call Trace: [] __rtnl_link_unregister+0xca/0xd0 [] rtnl_link_unregister+0x19/0x30 [] vxlan_cleanup_module+0x10/0x2f [vxlan] [] SyS_delete_module+0x1cf/0x2c0 [] ? do_page_fault+0x9/0x10 [] system_call_fastpath+0x16/0x1b Code: 4d 85 ed 0f 84 95 00 00 00 4c 8d a7 c0 07 00 00 49 8d bd 10 08 00 00 e8 28 e8 e6 e0 48 8b 83 c0 07 00 00 49 8b 54 24 08 48 85 c0 <48> 89 02 74 04 48 89 50 08 49 b8 00 02 20 00 00 00 ad de 4d 89 RIP [] vxlan_dellink+0x77/0xf0 [vxlan] RSP CR2: 0000000000000000 Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 767f7af..2c15f6c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1794,7 +1794,8 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) struct vxlan_dev *vxlan = netdev_priv(dev); spin_lock(&vn->sock_lock); - hlist_del_rcu(&vxlan->hlist); + if (!hlist_unhashed(&vxlan->hlist)) + hlist_del_rcu(&vxlan->hlist); spin_unlock(&vn->sock_lock); list_del(&vxlan->next); -- cgit v0.10.2 From c0a77ec74f295013d7ba3204dd3ed25fccf83cb4 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 4 Sep 2013 23:46:58 -0400 Subject: bnx2x: Add missing braces in bnx2x:bnx2x_link_initialize The indentation here implies that the intent was for this to be a multiline if. Introduced a few years ago in commit ec146a6f019923819f5ca381980248b6d154ca1a ("bnx2x: Modify XGXS functions") Signed-off-by: Dave Jones Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 9d64b98..6645684 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -6501,12 +6501,13 @@ static int bnx2x_link_initialize(struct link_params *params, struct bnx2x_phy *phy = ¶ms->phy[INT_PHY]; if (vars->line_speed == SPEED_AUTO_NEG && (CHIP_IS_E1x(bp) || - CHIP_IS_E2(bp))) + CHIP_IS_E2(bp))) { bnx2x_set_parallel_detection(phy, params); if (params->phy[INT_PHY].config_init) params->phy[INT_PHY].config_init(phy, params, vars); + } } /* Init external phy*/ -- cgit v0.10.2 From 0c1db731bfcf3a9fd6c58132134f8b0f423552f0 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 5 Sep 2013 00:11:19 -0400 Subject: caif: Add missing braces to multiline if in cfctrl_linkup_request The indentation here implies this was meant to be a multi-line if. Introduced several years back in commit c85c2951d4da1236e32f1858db418221e624aba5 ("caif: Handle dev_queue_xmit errors.") Signed-off-by: Dave Jones Signed-off-by: David S. Miller diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 2bd4b58..0f45522 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -293,9 +293,10 @@ int cfctrl_linkup_request(struct cflayer *layer, count = cfctrl_cancel_req(&cfctrl->serv.layer, user_layer); - if (count != 1) + if (count != 1) { pr_err("Could not remove request (%d)", count); return -ENODEV; + } } return 0; } -- cgit v0.10.2 From e2e5c4c07caf810d7849658dca42f598b3938e21 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 5 Sep 2013 13:43:34 -0400 Subject: tcp: Add missing braces to do_tcp_setsockopt Signed-off-by: Dave Jones Acked-by: Neal Cardwell Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b2f6c74..95544e4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2454,10 +2454,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_THIN_DUPACK: if (val < 0 || val > 1) err = -EINVAL; - else + else { tp->thin_dupack = val; if (tp->thin_dupack) tcp_disable_early_retrans(tp); + } break; case TCP_REPAIR: -- cgit v0.10.2