From a505b3b30fc69904f858822a2aa95990a4bf7958 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 12 Sep 2010 11:56:44 -0700
Subject: sch_atm: Fix potential NULL deref.

The list_head conversion unearther an unnecessary flow
check.  Since flow is always NULL here we don't need to
see if a matching flow exists already.

Reported-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 34066278..6318e11 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -255,10 +255,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 			error = -EINVAL;
 			goto err_out;
 		}
-		if (!list_empty(&flow->list)) {
-			error = -EEXIST;
-			goto err_out;
-		}
 	} else {
 		int i;
 		unsigned long cl;
-- 
cgit v0.10.2


From 339db11b219f36cf7da61b390992d95bb6b7ba2e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 10 Sep 2010 01:56:16 +0000
Subject: net/llc: make opt unsigned in llc_ui_setsockopt()

The members of struct llc_sock are unsigned so if we pass a negative
value for "opt" it can cause a sign bug.  Also it can cause an integer
overflow when we multiply "opt * HZ".

CC: stable@kernel.org
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 023ba82..5826129 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1024,7 +1024,8 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
 {
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
-	int rc = -EINVAL, opt;
+	unsigned int opt;
+	int rc = -EINVAL;
 
 	lock_sock(sk);
 	if (unlikely(level != SOL_LLC || optlen != sizeof(int)))
-- 
cgit v0.10.2


From 3429769bc67c7a48b3c01b2452b32171b3450202 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 10 Sep 2010 01:58:10 +0000
Subject: ppp: potential NULL dereference in ppp_mp_explode()

Smatch complains because we check whether "pch->chan" is NULL and then
dereference it unconditionally on the next line.  Partly the reason this
bug was introduced is because code was too complicated.  I've simplified
it a little.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 6695a51..736b917 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1314,8 +1314,13 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 	hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN;
 	i = 0;
 	list_for_each_entry(pch, &ppp->channels, clist) {
-		navail += pch->avail = (pch->chan != NULL);
-		pch->speed = pch->chan->speed;
+		if (pch->chan) {
+			pch->avail = 1;
+			navail++;
+			pch->speed = pch->chan->speed;
+		} else {
+			pch->avail = 0;
+		}
 		if (pch->avail) {
 			if (skb_queue_empty(&pch->file.xq) ||
 				!pch->had_frag) {
-- 
cgit v0.10.2


From 7998156344b0d93de61ff8e5d75e96500e43a571 Mon Sep 17 00:00:00 2001
From: Bob Arendt <rda@rincon.com>
Date: Mon, 13 Sep 2010 12:56:03 -0700
Subject: ipv4: force_igmp_version ignored when a IGMPv3 query received

After all these years, it turns out that the
    /proc/sys/net/ipv4/conf/*/force_igmp_version
parameter isn't fully implemented.

*Symptom*:
When set force_igmp_version to a value of 2, the kernel should only perform
multicast IGMPv2 operations (IETF rfc2236).  An host-initiated Join message
will be sent as a IGMPv2 Join message.  But if a IGMPv3 query message is
received, the host responds with a IGMPv3 join message.  Per rfc3376 and
rfc2236, a IGMPv2 host should treat a IGMPv3 query as a IGMPv2 query and
respond with an IGMPv2 Join message.

*Consequences*:
This is an issue when a IGMPv3 capable switch is the querier and will only
issue IGMPv3 queries (which double as IGMPv2 querys) and there's an
intermediate switch that is only IGMPv2 capable.  The intermediate switch
processes the initial v2 Join, but fails to recognize the IGMPv3 Join responses
to the Query, resulting in a dropped connection when the intermediate v2-only
switch times it out.

*Identifying issue in the kernel source*:
The issue is in this section of code (in net/ipv4/igmp.c), which is called when
an IGMP query is received  (from mainline 2.6.36-rc3 gitweb):
 ...
A IGMPv3 query has a length >= 12 and no sources.  This routine will exit after
line 880, setting the general query timer (random timeout between 0 and query
response time).  This calls igmp_gq_timer_expire():
...
.. which only sends a v3 response.  So if a v3 query is received, the kernel
always sends a v3 response.

IGMP queries happen once every 60 sec (per vlan), so the traffic is low.  A
IGMPv3 query *is* a strict superset of a IGMPv2 query, so this patch properly
short circuit's the v3 behaviour.

One issue is that this does not address force_igmp_version=1.  Then again, I've
never seen any IGMPv1 multicast equipment in the wild.  However there is a lot
of v2-only equipment. If it's necessary to support the IGMPv1 case as well:

837         if (len == 8 || IGMP_V2_SEEN(in_dev) || IGMP_V1_SEEN(in_dev)) {

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a1ad0e7..1fdcacd 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -834,7 +834,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	int			mark = 0;
 
 
-	if (len == 8) {
+	if (len == 8 || IGMP_V2_SEEN(in_dev)) {
 		if (ih->code == 0) {
 			/* Alas, old v1 router presents here. */
 
-- 
cgit v0.10.2


From a89b47639f3e11dd9a8eb78a5d3382e109c876f2 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@gmail.com>
Date: Fri, 10 Sep 2010 20:26:56 +0000
Subject: ipv4: enable getsockopt() for IP_NODEFRAG

While integrating your man-pages patch for IP_NODEFRAG, I noticed
that this option is settable by setsockopt(), but not gettable by
getsockopt(). I suppose this is not intended. The (untested,
trivial) patch below adds getsockopt() support.

Signed-off-by: Michael kerrisk <mtk.manpages@gmail.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 6c40a8c..64b70ad 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1129,6 +1129,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_HDRINCL:
 		val = inet->hdrincl;
 		break;
+	case IP_NODEFRAG:
+		val = inet->nodefrag;
+		break;
 	case IP_MTU_DISCOVER:
 		val = inet->pmtudisc;
 		break;
-- 
cgit v0.10.2


From ee05d6939ed17b55e9c2466af32c208e0d547eb8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Sep 2010 15:15:52 +0200
Subject: vhost-net: fix range checking in mrg bufs case

In mergeable buffer case, we use headcount, log_num
and seg as indexes in same-size arrays, and
we know that headcount <= seg and
log_num equals either 0 or seg.

Therefore, the right thing to do is range-check seg,
not headcount as we do now: these will be different
if guest chains s/g descriptors (this does not
happen now, but we can not trust the guest).

Long term, we should add BUG_ON checks to verify
two other indexes are what we think they should be.

Reported-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 29e850a..7c80082 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -243,7 +243,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 	int r, nlogs = 0;
 
 	while (datalen > 0) {
-		if (unlikely(headcount >= VHOST_NET_MAX_SG)) {
+		if (unlikely(seg >= VHOST_NET_MAX_SG)) {
 			r = -ENOBUFS;
 			goto err;
 		}
-- 
cgit v0.10.2


From ab12811c89e88f2e66746790b1fe4469ccb7bdd9 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <andy@greyhouse.net>
Date: Fri, 10 Sep 2010 11:43:20 +0000
Subject: bonding: correctly process non-linear skbs

It was recently brought to my attention that 802.3ad mode bonds would no
longer form when using some network hardware after a driver update.
After snooping around I realized that the particular hardware was using
page-based skbs and found that skb->data did not contain a valid LACPDU
as it was not stored there.  That explained the inability to form an
802.3ad-based bond.  For balance-alb mode bonds this was also an issue
as ARPs would not be properly processed.

This patch fixes the issue in my tests and should be applied to 2.6.36
and as far back as anyone cares to add it to stable.

Thanks to Alexander Duyck <alexander.h.duyck@intel.com> and Jesse
Brandeburg <jesse.brandeburg@intel.com> for the suggestions on this one.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Jesse Brandeburg <jesse.brandeburg@intel.com>
CC: stable@kerne.org
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 822f586..0ddf4c6 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2466,6 +2466,9 @@ int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct pac
 	if (!(dev->flags & IFF_MASTER))
 		goto out;
 
+	if (!pskb_may_pull(skb, sizeof(struct lacpdu)))
+		goto out;
+
 	read_lock(&bond->lock);
 	slave = bond_get_slave_by_dev((struct bonding *)netdev_priv(dev),
 					orig_dev);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c746b33..26bb118 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -362,6 +362,9 @@ static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct
 		goto out;
 	}
 
+	if (!pskb_may_pull(skb, arp_hdr_len(bond_dev)))
+		goto out;
+
 	if (skb->len < sizeof(struct arp_pkt)) {
 		pr_debug("Packet is too small to be an ARP\n");
 		goto out;
-- 
cgit v0.10.2


From ef885afbf8a37689afc1d9d545e2f3e7a8276c17 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 13 Sep 2010 12:24:54 +0000
Subject: net: use rcu_barrier() in rollback_registered_many

netdev_wait_allrefs() waits that all references to a device vanishes.

It currently uses a _very_ pessimistic 250 ms delay between each probe.
Some users reported that no more than 4 devices can be dismantled per
second, this is a pretty serious problem for some setups.

Most of the time, a refcount is about to be released by an RCU callback,
that is still in flight because rollback_registered_many() uses a
synchronize_rcu() call instead of rcu_barrier(). Problem is visible if
number of online cpus is one, because synchronize_rcu() is then a no op.

time to remove 50 ipip tunnels on a UP machine :

before patch : real 11.910s
after patch : real 1.250s

Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: Octavian Purdila <opurdila@ixiacom.com>
Reported-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index b9b22a3..660dd41 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4845,7 +4845,7 @@ static void rollback_registered_many(struct list_head *head)
 	dev = list_first_entry(head, struct net_device, unreg_list);
 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
-	synchronize_net();
+	rcu_barrier();
 
 	list_for_each_entry(dev, head, unreg_list)
 		dev_put(dev);
-- 
cgit v0.10.2


From fddd91016d16277a32727ad272cf2edd3d309c90 Mon Sep 17 00:00:00 2001
From: Simon Guinot <sguinot@lacie.com>
Date: Mon, 13 Sep 2010 22:12:01 +0000
Subject: phylib: fix PAL state machine restart on resume

On resume, before starting the PAL state machine, check if the
adjust_link() method is well supplied. If not, this would lead to a
NULL pointer dereference in the phy_state_machine() function.

This scenario can happen if the Ethernet driver call manually the PHY
functions instead of using the PAL state machine. The mv643xx_eth driver
is a such example.

Signed-off-by: Simon Guinot <sguinot@lacie.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6a6b819..6c58da2 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -308,7 +308,7 @@ static int mdio_bus_suspend(struct device *dev)
 	 * may call phy routines that try to grab the same lock, and that may
 	 * lead to a deadlock.
 	 */
-	if (phydev->attached_dev)
+	if (phydev->attached_dev && phydev->adjust_link)
 		phy_stop_machine(phydev);
 
 	if (!mdio_bus_phy_may_suspend(phydev))
@@ -331,7 +331,7 @@ static int mdio_bus_resume(struct device *dev)
 		return ret;
 
 no_resume:
-	if (phydev->attached_dev)
+	if (phydev->attached_dev && phydev->adjust_link)
 		phy_start_machine(phydev, NULL);
 
 	return 0;
-- 
cgit v0.10.2


From 6dcbc12290abb452a5e42713faa6461b248e2f55 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 14 Sep 2010 21:41:20 -0700
Subject: net: RPS needs to depend upon USE_GENERIC_SMP_HELPERS

You cannot invoke __smp_call_function_single() unless the
architecture sets this symbol.

Reported-by: Daniel Hellstrom <daniel@gaisler.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/Kconfig b/net/Kconfig
index e330594..e926884 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -217,7 +217,7 @@ source "net/dns_resolver/Kconfig"
 
 config RPS
 	boolean
-	depends on SMP && SYSFS
+	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
 menu "Network testing"
-- 
cgit v0.10.2


From 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Wed, 15 Sep 2010 10:27:52 -0700
Subject: tcp: Prevent overzealous packetization by SWS logic.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised
window, the SWS logic will packetize to half the MSS unnecessarily.

This causes problems with some embedded devices.

However for large MSS devices we do want to half-MSS packetize
otherwise we never get enough packets into the pipe for things
like fast retransmit and recovery to work.

Be careful also to handle the case where MSS > window, otherwise
we'll never send until the probe timer.

Reported-by: ツ Leandro Melo de Sales <leandroal@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/tcp.h b/include/net/tcp.h
index eaa9582..3e4b33e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -475,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk);
 /* Bound MSS / TSO packet size with the half of the window */
 static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 {
-	if (tp->max_window && pktsize > (tp->max_window >> 1))
-		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+	int cutoff;
+
+	/* When peer uses tiny windows, there is no use in packetizing
+	 * to sub-MSS pieces for the sake of SWS or making sure there
+	 * are enough packets in the pipe for fast recovery.
+	 *
+	 * On the other hand, for extremely large MSS devices, handling
+	 * smaller than MSS windows in this way does make sense.
+	 */
+	if (tp->max_window >= 512)
+		cutoff = (tp->max_window >> 1);
+	else
+		cutoff = tp->max_window;
+
+	if (cutoff && pktsize > cutoff)
+		return max_t(int, cutoff, 68U - tp->tcp_header_len);
 	else
 		return pktsize;
 }
-- 
cgit v0.10.2


From 84176b7b56704580e008e6cb820dd4ccf622a1fd Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <dkirjanov@kernel.org>
Date: Wed, 15 Sep 2010 00:58:46 +0000
Subject: 3c59x: Remove atomic context inside vortex_{set|get}_wol

There is no need to use spinlocks in vortex_{set|get}_wol.
This also fixes a bug:
[  254.214993] 3c59x 0000:00:0d.0: PME# enabled
[  254.215021] BUG: sleeping function called from invalid context at kernel/mutex.c:94
[  254.215030] in_atomic(): 0, irqs_disabled(): 1, pid: 4875, name: ethtool
[  254.215042] Pid: 4875, comm: ethtool Tainted: G        W   2.6.36-rc3+ #7
[  254.215049] Call Trace:
[  254.215050]  [] __might_sleep+0xb1/0xb6
[  254.215050]  [] mutex_lock+0x17/0x30
[  254.215050]  [] acpi_enable_wakeup_device_power+0x2b/0xb1
[  254.215050]  [] acpi_pm_device_sleep_wake+0x42/0x7f
[  254.215050]  [] acpi_pci_sleep_wake+0x5d/0x63
[  254.215050]  [] platform_pci_sleep_wake+0x1d/0x20
[  254.215050]  [] __pci_enable_wake+0x90/0xd0
[  254.215050]  [] acpi_set_WOL+0x8e/0xf5 [3c59x]
[  254.215050]  [] vortex_set_wol+0x4e/0x5e [3c59x]
[  254.215050]  [] dev_ethtool+0x1cf/0xb61
[  254.215050]  [] ? debug_mutex_free_waiter+0x45/0x4a
[  254.215050]  [] ? __mutex_lock_common+0x204/0x20e
[  254.215050]  [] ? __mutex_lock_slowpath+0x12/0x15
[  254.215050]  [] ? mutex_lock+0x23/0x30
[  254.215050]  [] dev_ioctl+0x42c/0x533
[  254.215050]  [] ? _cond_resched+0x8/0x1c
[  254.215050]  [] ? lock_page+0x1c/0x30
[  254.215050]  [] ? page_address+0x15/0x7c
[  254.215050]  [] ? filemap_fault+0x187/0x2c4
[  254.215050]  [] sock_ioctl+0x1d4/0x1e0
[  254.215050]  [] ? sock_ioctl+0x0/0x1e0
[  254.215050]  [] vfs_ioctl+0x19/0x33
[  254.215050]  [] do_vfs_ioctl+0x424/0x46f
[  254.215050]  [] ? selinux_file_ioctl+0x3c/0x40
[  254.215050]  [] sys_ioctl+0x40/0x5a
[  254.215050]  [] sysenter_do_call+0x12/0x22

vortex_set_wol protected with a spinlock, but nested  acpi_set_WOL acquires a mutex inside atomic context.
Ethtool operations are already serialized by RTNL mutex, so it is safe to drop the locks.

Signed-off-by: Denis Kirjanov <dkirjanov@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 85671ad..fa42103 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -635,6 +635,9 @@ struct vortex_private {
 		must_free_region:1,				/* Flag: if zero, Cardbus owns the I/O region */
 		large_frames:1,			/* accept large frames */
 		handling_irq:1;			/* private in_irq indicator */
+	/* {get|set}_wol operations are already serialized by rtnl.
+	 * no additional locking is required for the enable_wol and acpi_set_WOL()
+	 */
 	int drv_flags;
 	u16 status_enable;
 	u16 intr_enable;
@@ -2939,13 +2942,11 @@ static void vortex_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct vortex_private *vp = netdev_priv(dev);
 
-	spin_lock_irq(&vp->lock);
 	wol->supported = WAKE_MAGIC;
 
 	wol->wolopts = 0;
 	if (vp->enable_wol)
 		wol->wolopts |= WAKE_MAGIC;
-	spin_unlock_irq(&vp->lock);
 }
 
 static int vortex_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -2954,13 +2955,11 @@ static int vortex_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	if (wol->wolopts & ~WAKE_MAGIC)
 		return -EINVAL;
 
-	spin_lock_irq(&vp->lock);
 	if (wol->wolopts & WAKE_MAGIC)
 		vp->enable_wol = 1;
 	else
 		vp->enable_wol = 0;
 	acpi_set_WOL(dev);
-	spin_unlock_irq(&vp->lock);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 801e147cde02f04b5c2f42764cd43a89fc7400a2 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 14 Sep 2010 11:57:11 +0000
Subject: r8169: Handle rxfifo errors on 8168 chips

The Thinkpad X100e seems to have some odd behaviour when the display is
powered off - the onboard r8169 starts generating rxfifo overflow errors.
The root cause of this has not yet been identified and may well be a
hardware design bug on the platform, but r8169 should be more resiliant to
this. This patch enables the rxfifo interrupt on 8168 devices and removes
the MAC version check in the interrupt handler, and the machine no longer
crashes when under network load while the screen turns off.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 078bbf4..a0da4a1 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2934,7 +2934,7 @@ static const struct rtl_cfg_info {
 		.hw_start	= rtl_hw_start_8168,
 		.region		= 2,
 		.align		= 8,
-		.intr_event	= SYSErr | LinkChg | RxOverflow |
+		.intr_event	= SYSErr | RxFIFOOver | LinkChg | RxOverflow |
 				  TxErr | TxOK | RxOK | RxErr,
 		.napi_event	= TxErr | TxOK | RxOK | RxOverflow,
 		.features	= RTL_FEATURE_GMII | RTL_FEATURE_MSI,
@@ -4625,8 +4625,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		}
 
 		/* Work around for rx fifo overflow */
-		if (unlikely(status & RxFIFOOver) &&
-		(tp->mac_version == RTL_GIGA_MAC_VER_11)) {
+		if (unlikely(status & RxFIFOOver)) {
 			netif_stop_queue(dev);
 			rtl8169_tx_timeout(dev);
 			break;
-- 
cgit v0.10.2


From e71895a1beff2014534c9660d9ae42e043f11555 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 16 Sep 2010 12:27:50 +0000
Subject: xfrm: dont assume rcu_read_lock in xfrm_output_one()

ip_local_out() is called with rcu_read_lock() held from ip_queue_xmit()
but not from other call sites.

Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index a3cca0a..64f2ae1 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -101,7 +101,7 @@ resume:
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
-		skb_dst_set_noref(skb, dst);
+		skb_dst_set(skb, dst_clone(dst));
 		x = dst->xfrm;
 	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
-- 
cgit v0.10.2


From 7011e660938fc44ed86319c18a5954e95a82ab3e Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Wed, 15 Sep 2010 11:43:28 +0000
Subject: drivers/net/usb/hso.c: prevent reading uninitialized memory

Fixed formatting (tabs and line breaks).

The TIOCGICOUNT device ioctl allows unprivileged users to read
uninitialized stack memory, because the "reserved" member of the
serial_icounter_struct struct declared on the stack in hso_get_count()
is not altered or zeroed before being copied back to the user.  This
patch takes care of it.

Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 6efca66..1cd752f 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -1652,6 +1652,8 @@ static int hso_get_count(struct hso_serial *serial,
 	struct uart_icount cnow;
 	struct hso_tiocmget  *tiocmget = serial->tiocmget;
 
+	memset(&icount, 0, sizeof(struct serial_icounter_struct));
+
 	if (!tiocmget)
 		 return -ENOENT;
 	spin_lock_irq(&serial->serial_lock);
-- 
cgit v0.10.2


From 44467187dc22fdd33a1a06ea0ba86ce20be3fe3c Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Wed, 15 Sep 2010 11:43:04 +0000
Subject: drivers/net/eql.c: prevent reading uninitialized stack memory

Fixed formatting (tabs and line breaks).

The EQL_GETMASTRCFG device ioctl allows unprivileged users to read 16
bytes of uninitialized stack memory, because the "master_name" member of
the master_config_t struct declared on the stack in eql_g_master_cfg()
is not altered or zeroed before being copied back to the user.  This
patch takes care of it.

Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index dda2c79..0cb1cf9 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -555,6 +555,8 @@ static int eql_g_master_cfg(struct net_device *dev, master_config_t __user *mcp)
 	equalizer_t *eql;
 	master_config_t mc;
 
+	memset(&mc, 0, sizeof(master_config_t));
+
 	if (eql_is_master(dev)) {
 		eql = netdev_priv(dev);
 		mc.max_slaves = eql->max_slaves;
-- 
cgit v0.10.2


From 49c37c0334a9b85d30ab3d6b5d1acb05ef2ef6de Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Wed, 15 Sep 2010 11:43:12 +0000
Subject: drivers/net/cxgb3/cxgb3_main.c: prevent reading uninitialized stack
 memory

Fixed formatting (tabs and line breaks).

The CHELSIO_GET_QSET_NUM device ioctl allows unprivileged users to read
4 bytes of uninitialized stack memory, because the "addr" member of the
ch_reg struct declared on the stack in cxgb_extension_ioctl() is not
altered or zeroed before being copied back to the user.  This patch
takes care of it.

Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index ad19585..f208712 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -2296,6 +2296,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 	case CHELSIO_GET_QSET_NUM:{
 		struct ch_reg edata;
 
+		memset(&edata, 0, sizeof(struct ch_reg));
+
 		edata.cmd = CHELSIO_GET_QSET_NUM;
 		edata.val = pi->nqsets;
 		if (copy_to_user(useraddr, &edata, sizeof(edata)))
-- 
cgit v0.10.2


From e443e383242839b0d72efce744b7ba31573b1f99 Mon Sep 17 00:00:00 2001
From: Chris Snook <chris.snook@gmail.com>
Date: Thu, 16 Sep 2010 22:00:28 -0700
Subject: MAINTAINERS: move atlx discussions to netdev

The atlx drivers are sufficiently mature that we no longer need a separate
mailing list for them.  Move the discussion to netdev, so we can decommission
atl1-devel, which is now mostly spam.

Signed-off-by: Chris Snook <chris.snook@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/MAINTAINERS b/MAINTAINERS
index b5fb9bd..44e6595 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1135,7 +1135,7 @@ ATLX ETHERNET DRIVERS
 M:	Jay Cliburn <jcliburn@gmail.com>
 M:	Chris Snook <chris.snook@gmail.com>
 M:	Jie Yang <jie.yang@atheros.com>
-L:	atl1-devel@lists.sourceforge.net
+L:	netdev@vger.kernel.org
 W:	http://sourceforge.net/projects/atl1
 W:	http://atl1.sourceforge.net
 S:	Maintained
-- 
cgit v0.10.2


From 2507136f74f70a4869bd4f525d48715ae66db43d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 16 Sep 2010 08:12:55 +0000
Subject: net/llc: storing negative error codes in unsigned short

If the alloc_skb() fails then we return 65431 instead of -ENOBUFS
(-105).

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index e4dae02..cf4aea3 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -689,7 +689,7 @@ static void llc_station_rcv(struct sk_buff *skb)
 
 int __init llc_station_init(void)
 {
-	u16 rc = -ENOBUFS;
+	int rc = -ENOBUFS;
 	struct sk_buff *skb;
 	struct llc_station_state_ev *ev;
 
-- 
cgit v0.10.2


From 7acc7c683a747689aaaaad4fce1683fc3f85e552 Mon Sep 17 00:00:00 2001
From: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Date: Wed, 8 Sep 2010 08:30:20 -0700
Subject: iwlwifi: do not perferm force reset while doing scan

When uCode error condition detected, driver try to perform either
rf reset or firmware reload in order bring device back to
working condition.

If rf reset is required and scan is in process, there is no need
to issue rf reset since scan already reset the rf.

If firmware reload is required and scan is in process, skip the
reload request. There is a possibility firmware reload during
scan cause problem.

[  485.804046] WARNING: at net/mac80211/main.c:310 ieee80211_restart_hw+0x28/0x62()
[  485.804049] Hardware name: Latitude E6400
[  485.804052] ieee80211_restart_hw called with hardware scan in progress
[  485.804054] Modules linked in: iwlagn iwlcore bnep sco rfcomm l2cap crc16 bluetooth [last unloaded: iwlcore]
[  485.804069] Pid: 812, comm: kworker/u:3 Tainted: G        W   2.6.36-rc3-wl+ #74
[  485.804072] Call Trace:
[  485.804079]  [<c103019a>] warn_slowpath_common+0x60/0x75
[  485.804084]  [<c1030213>] warn_slowpath_fmt+0x26/0x2a
[  485.804089]  [<c145da67>] ieee80211_restart_hw+0x28/0x62
[  485.804102]  [<f8b35dc6>] iwl_bg_restart+0x113/0x150 [iwlagn]
[  485.804108]  [<c10415d5>] process_one_work+0x181/0x25c
[  485.804119]  [<f8b35cb3>] ? iwl_bg_restart+0x0/0x150 [iwlagn]
[  485.804124]  [<c104190a>] worker_thread+0xf9/0x1f2
[  485.804128]  [<c1041811>] ? worker_thread+0x0/0x1f2
[  485.804133]  [<c10451b0>] kthread+0x64/0x69
[  485.804137]  [<c104514c>] ? kthread+0x0/0x69
[  485.804141]  [<c1002df6>] kernel_thread_helper+0x6/0x10
[  485.804145] ---[ end trace 3d4ebdc02d524bbb ]---
[  485.804148] WG> 1
[  485.804153] Pid: 812, comm: kworker/u:3 Tainted: G        W   2.6.36-rc3-wl+ #74
[  485.804156] Call Trace:
[  485.804161]  [<c145da9b>] ? ieee80211_restart_hw+0x5c/0x62
[  485.804172]  [<f8b35dcb>] iwl_bg_restart+0x118/0x150 [iwlagn]
[  485.804177]  [<c10415d5>] process_one_work+0x181/0x25c
[  485.804188]  [<f8b35cb3>] ? iwl_bg_restart+0x0/0x150 [iwlagn]
[  485.804192]  [<c104190a>] worker_thread+0xf9/0x1f2
[  485.804197]  [<c1041811>] ? worker_thread+0x0/0x1f2
[  485.804201]  [<c10451b0>] kthread+0x64/0x69
[  485.804205]  [<c104514c>] ? kthread+0x0/0x69
[  485.804209]  [<c1002df6>] kernel_thread_helper+0x6/0x10

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 07dbc27..e23c406 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -2613,6 +2613,11 @@ int iwl_force_reset(struct iwl_priv *priv, int mode, bool external)
 	if (test_bit(STATUS_EXIT_PENDING, &priv->status))
 		return -EINVAL;
 
+	if (test_bit(STATUS_SCANNING, &priv->status)) {
+		IWL_DEBUG_INFO(priv, "scan in progress.\n");
+		return -EINVAL;
+	}
+
 	if (mode >= IWL_MAX_FORCE_RESET) {
 		IWL_DEBUG_INFO(priv, "invalid reset request.\n");
 		return -EINVAL;
-- 
cgit v0.10.2


From 4bdab43323b459900578b200a4b8cf9713ac8fab Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 15 Sep 2010 10:00:26 -0400
Subject: sctp: Do not reset the packet during sctp_packet_config().

sctp_packet_config() is called when getting the packet ready
for appending of chunks.  The function should not touch the
current state, since it's possible to ping-pong between two
transports when sending, and that can result packet corruption
followed by skb overlfow crash.

Reported-by: Thomas Dreibholz <dreibh@iem.uni-due.de>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/output.c b/net/sctp/output.c
index a646681..bcc4590 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -92,7 +92,6 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
 	SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__,
 			  packet, vtag);
 
-	sctp_packet_reset(packet);
 	packet->vtag = vtag;
 
 	if (ecn_capable && sctp_packet_empty(packet)) {
-- 
cgit v0.10.2


From f0f9deae9e7c421fa0c1c627beb8e174325e1ba7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 17 Sep 2010 16:55:03 -0700
Subject: netpoll: Disable IRQ around RCU dereference in netpoll_rx

We cannot use rcu_dereference_bh safely in netpoll_rx as we may
be called with IRQs disabled.  We could however simply disable
IRQs as that too causes BH to be disabled and is safe in either
case.

Thanks to John Linville for discovering this bug and providing
a patch.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 791d510..50d8009 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,20 +63,20 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 	unsigned long flags;
 	bool ret = false;
 
-	rcu_read_lock_bh();
+	local_irq_save(flags);
 	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
 	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
 		goto out;
 
-	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	spin_lock(&npinfo->rx_lock);
 	/* check rx_flags again with the lock held */
 	if (npinfo->rx_flags && __netpoll_rx(skb))
 		ret = true;
-	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	spin_unlock(&npinfo->rx_lock);
 
 out:
-	rcu_read_unlock_bh();
+	local_irq_restore(flags);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 4e8cec269dd9e823804141f25ce37c23e72d3c12 Mon Sep 17 00:00:00 2001
From: "Sosnowski, Maciej" <maciej.sosnowski@intel.com>
Date: Thu, 16 Sep 2010 06:02:26 +0000
Subject: dca: disable dca on IOAT ver.3.0 multiple-IOH platforms

Direct Cache Access is not supported on IOAT ver.3.0 multiple-IOH platforms.
This patch blocks registering of dca providers when multiple IOH detected with IOAT ver.3.0.

Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 8661c84..b98c676 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -39,6 +39,10 @@ static DEFINE_SPINLOCK(dca_lock);
 
 static LIST_HEAD(dca_domains);
 
+static BLOCKING_NOTIFIER_HEAD(dca_provider_chain);
+
+static int dca_providers_blocked;
+
 static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -70,6 +74,60 @@ static void dca_free_domain(struct dca_domain *domain)
 	kfree(domain);
 }
 
+static int dca_provider_ioat_ver_3_0(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
+		((pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG0) ||
+		(pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG1) ||
+		(pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG2) ||
+		(pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG3) ||
+		(pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG4) ||
+		(pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG5) ||
+		(pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG6) ||
+		(pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG7)));
+}
+
+static void unregister_dca_providers(void)
+{
+	struct dca_provider *dca, *_dca;
+	struct list_head unregistered_providers;
+	struct dca_domain *domain;
+	unsigned long flags;
+
+	blocking_notifier_call_chain(&dca_provider_chain,
+				     DCA_PROVIDER_REMOVE, NULL);
+
+	INIT_LIST_HEAD(&unregistered_providers);
+
+	spin_lock_irqsave(&dca_lock, flags);
+
+	if (list_empty(&dca_domains)) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return;
+	}
+
+	/* at this point only one domain in the list is expected */
+	domain = list_first_entry(&dca_domains, struct dca_domain, node);
+	if (!domain)
+		return;
+
+	list_for_each_entry_safe(dca, _dca, &domain->dca_providers, node) {
+		list_del(&dca->node);
+		list_add(&dca->node, &unregistered_providers);
+	}
+
+	dca_free_domain(domain);
+
+	spin_unlock_irqrestore(&dca_lock, flags);
+
+	list_for_each_entry_safe(dca, _dca, &unregistered_providers, node) {
+		dca_sysfs_remove_provider(dca);
+		list_del(&dca->node);
+	}
+}
+
 static struct dca_domain *dca_find_domain(struct pci_bus *rc)
 {
 	struct dca_domain *domain;
@@ -90,9 +148,13 @@ static struct dca_domain *dca_get_domain(struct device *dev)
 	domain = dca_find_domain(rc);
 
 	if (!domain) {
-		domain = dca_allocate_domain(rc);
-		if (domain)
-			list_add(&domain->node, &dca_domains);
+		if (dca_provider_ioat_ver_3_0(dev) && !list_empty(&dca_domains)) {
+			dca_providers_blocked = 1;
+		} else {
+			domain = dca_allocate_domain(rc);
+			if (domain)
+				list_add(&domain->node, &dca_domains);
+		}
 	}
 
 	return domain;
@@ -293,8 +355,6 @@ void free_dca_provider(struct dca_provider *dca)
 }
 EXPORT_SYMBOL_GPL(free_dca_provider);
 
-static BLOCKING_NOTIFIER_HEAD(dca_provider_chain);
-
 /**
  * register_dca_provider - register a dca provider
  * @dca - struct created by alloc_dca_provider()
@@ -306,6 +366,13 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
 	unsigned long flags;
 	struct dca_domain *domain;
 
+	spin_lock_irqsave(&dca_lock, flags);
+	if (dca_providers_blocked) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+	spin_unlock_irqrestore(&dca_lock, flags);
+
 	err = dca_sysfs_add_provider(dca, dev);
 	if (err)
 		return err;
@@ -313,7 +380,13 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
 	spin_lock_irqsave(&dca_lock, flags);
 	domain = dca_get_domain(dev);
 	if (!domain) {
-		spin_unlock_irqrestore(&dca_lock, flags);
+		if (dca_providers_blocked) {
+			spin_unlock_irqrestore(&dca_lock, flags);
+			dca_sysfs_remove_provider(dca);
+			unregister_dca_providers();
+		} else {
+			spin_unlock_irqrestore(&dca_lock, flags);
+		}
 		return -ENODEV;
 	}
 	list_add(&dca->node, &domain->dca_providers);
-- 
cgit v0.10.2


From 04746ff1289f75af26af279eb4b0b3e231677ee4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 17 Sep 2010 22:58:08 -0700
Subject: qlcnic: dont assume NET_IP_ALIGN is 2

qlcnic driver allocates rx skbs and gives to hardware too bytes of extra
storage, allowing for corruption of kernel data.

NET_IP_ALIGN being 0 on some platforms (including x86), drivers should
not assume it's 2.

rds_ring->skb_size = rds_ring->dma_size + NET_IP_ALIGN;
...
skb = dev_alloc_skb(rds_ring->skb_size);
skb_reserve(skb, 2);
pci_map_single(pdev, skb->data, rds_ring->dma_size, PCI_DMA_FROMDEVICE);

(and rds_ring->skb_size == rds_ring->dma_size) -> bug


Because of extra alignment (1500 + 32) -> four extra bytes are available
before the struct skb_shared_info, so corruption is not noticed.

Note: this driver could use netdev_alloc_skb_ip_align()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
index 75ba744..60ab753 100644
--- a/drivers/net/qlcnic/qlcnic_init.c
+++ b/drivers/net/qlcnic/qlcnic_init.c
@@ -1316,7 +1316,7 @@ qlcnic_alloc_rx_skb(struct qlcnic_adapter *adapter,
 		return -ENOMEM;
 	}
 
-	skb_reserve(skb, 2);
+	skb_reserve(skb, NET_IP_ALIGN);
 
 	dma = pci_map_single(pdev, skb->data,
 			rds_ring->dma_size, PCI_DMA_FROMDEVICE);
-- 
cgit v0.10.2


From 842c74bffcdb1d305ccd9e61e417cceae86b9963 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 20 Sep 2010 10:06:12 -0700
Subject: ip_gre: CONFIG_IPV6_MODULE support

ipv6 can be a module, we should test CONFIG_IPV6 and CONFIG_IPV6_MODULE
to enable ipv6 bits in ip_gre.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 945b20a..35c93e8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -45,7 +45,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
@@ -699,7 +699,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			if ((dst = rt->rt_gateway) == 0)
 				goto tx_error_icmp;
 		}
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
 			struct in6_addr *addr6;
 			int addr_type;
@@ -774,7 +774,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 
@@ -850,7 +850,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	if ((iph->ttl = tiph->ttl) == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
 			iph->ttl = old_iph->ttl;
-#ifdef CONFIG_IPV6
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
-- 
cgit v0.10.2


From df6d02300f7c2fbd0fbe626d819c8e5237d72c62 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Sep 2010 00:38:25 +0200
Subject: wext: fix potential private ioctl memory content leak

When a driver doesn't fill the entire buffer, old
heap contents may remain, and if it also doesn't
update the length properly, this old heap content
will be copied back to userspace.

It is very unlikely that this happens in any of
the drivers using private ioctls since it would
show up as junk being reported by iwpriv, but it
seems better to be safe here, so use kzalloc.

Reported-by: Jeff Mahoney <jeffm@suse.com>
Cc: stable@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c
index 3feb28e..674d426 100644
--- a/net/wireless/wext-priv.c
+++ b/net/wireless/wext-priv.c
@@ -152,7 +152,7 @@ static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd,
 	} else if (!iwp->pointer)
 		return -EFAULT;
 
-	extra = kmalloc(extra_size, GFP_KERNEL);
+	extra = kzalloc(extra_size, GFP_KERNEL);
 	if (!extra)
 		return -ENOMEM;
 
-- 
cgit v0.10.2


From 8444cf712c5f71845cba9dc30d8f530ff0d5ff83 Mon Sep 17 00:00:00 2001
From: Thomas Egerer <thomas.egerer@secunet.com>
Date: Mon, 20 Sep 2010 11:11:38 -0700
Subject: xfrm: Allow different selector family in temporary state

The family parameter xfrm_state_find is used to find a state matching a
certain policy. This value is set to the template's family
(encap_family) right before xfrm_state_find is called.
The family parameter is however also used to construct a temporary state
in xfrm_state_find itself which is wrong for inter-family scenarios
because it produces a selector for the wrong family. Since this selector
is included in the xfrm_user_acquire structure, user space programs
misinterpret IPv6 addresses as IPv4 and vice versa.
This patch splits up the original init_tempsel function into a part that
initializes the selector respectively the props and id of the temporary
state, to allow for differing ip address families whithin the state.

Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index fc8f36d..4f53532 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -298,8 +298,8 @@ struct xfrm_state_afinfo {
 	const struct xfrm_type	*type_map[IPPROTO_MAX];
 	struct xfrm_mode	*mode_map[XFRM_MODE_MAX];
 	int			(*init_flags)(struct xfrm_state *x);
-	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
-						struct xfrm_tmpl *tmpl,
+	void			(*init_tempsel)(struct xfrm_selector *sel, struct flowi *fl);
+	void			(*init_temprop)(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
 	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
 	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 1ef1366..4794762 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x)
 }
 
 static void
-__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
+{
+	sel->daddr.a4 = fl->fl4_dst;
+	sel->saddr.a4 = fl->fl4_src;
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET;
+	sel->prefixlen_d = 32;
+	sel->prefixlen_s = 32;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
 {
-	x->sel.daddr.a4 = fl->fl4_dst;
-	x->sel.saddr.a4 = fl->fl4_src;
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET;
-	x->sel.prefixlen_d = 32;
-	x->sel.prefixlen_s = 32;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
 	x->id = tmpl->id;
 	if (x->id.daddr.a4 == 0)
 		x->id.daddr.a4 = daddr->a4;
@@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.owner			= THIS_MODULE,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
+	.init_temprop		= xfrm4_init_temprop,
 	.output			= xfrm4_output,
 	.extract_input		= xfrm4_extract_input,
 	.extract_output		= xfrm4_extract_output,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index f417b77..a67575d 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -20,23 +20,27 @@
 #include <net/addrconf.h>
 
 static void
-__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		     struct xfrm_tmpl *tmpl,
-		     xfrm_address_t *daddr, xfrm_address_t *saddr)
+__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
 {
 	/* Initialize temporary selector matching only
 	 * to current session. */
-	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
-	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
-	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = htons(0xffff);
-	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = htons(0xffff);
-	x->sel.family = AF_INET6;
-	x->sel.prefixlen_d = 128;
-	x->sel.prefixlen_s = 128;
-	x->sel.proto = fl->proto;
-	x->sel.ifindex = fl->oif;
+	ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst);
+	ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src);
+	sel->dport = xfrm_flowi_dport(fl);
+	sel->dport_mask = htons(0xffff);
+	sel->sport = xfrm_flowi_sport(fl);
+	sel->sport_mask = htons(0xffff);
+	sel->family = AF_INET6;
+	sel->prefixlen_d = 128;
+	sel->prefixlen_s = 128;
+	sel->proto = fl->proto;
+	sel->ifindex = fl->oif;
+}
+
+static void
+xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
+		   xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
 	x->id = tmpl->id;
 	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
 		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
@@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.eth_proto		= htons(ETH_P_IPV6),
 	.owner			= THIS_MODULE,
 	.init_tempsel		= __xfrm6_init_tempsel,
+	.init_temprop		= xfrm6_init_temprop,
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2b3ed7a..cbab6e1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		    tmpl->mode == XFRM_MODE_BEET) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
-			family = tmpl->encap_family;
-			if (xfrm_addr_any(local, family)) {
-				error = xfrm_get_saddr(net, &tmp, remote, family);
+			if (xfrm_addr_any(local, tmpl->encap_family)) {
+				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
 				if (error)
 					goto fail;
 				local = &tmp;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5208b12f..eb96ce5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 EXPORT_SYMBOL(xfrm_sad_getinfo);
 
 static int
-xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
-		  struct xfrm_tmpl *tmpl,
-		  xfrm_address_t *daddr, xfrm_address_t *saddr,
-		  unsigned short family)
+xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
+		    struct xfrm_tmpl *tmpl,
+		    xfrm_address_t *daddr, xfrm_address_t *saddr,
+		    unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	if (!afinfo)
 		return -1;
-	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
+	afinfo->init_tempsel(&x->sel, fl);
+
+	if (family != tmpl->encap_family) {
+		xfrm_state_put_afinfo(afinfo);
+		afinfo = xfrm_state_get_afinfo(tmpl->encap_family);
+		if (!afinfo)
+			return -1;
+	}
+	afinfo->init_temprop(x, tmpl, daddr, saddr);
 	xfrm_state_put_afinfo(afinfo);
 	return 0;
 }
@@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
+	unsigned short encap_family = tmpl->encap_family;
 
 	to_put = NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family);
+	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 	if (best)
 		goto found;
 
-	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
+	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
-		if (x->props.family == family &&
+		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-		    xfrm_state_addr_check(x, daddr, saddr, family) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
 					   &best, &acquire_in_progress, &error);
 	}
 
@@ -829,7 +838,7 @@ found:
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
 		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
-					      tmpl->id.proto, family)) != NULL) {
+					      tmpl->id.proto, encap_family)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
 			goto out;
@@ -839,9 +848,9 @@ found:
 			error = -ENOMEM;
 			goto out;
 		}
-		/* Initialize temporary selector matching only
+		/* Initialize temporary state matching only
 		 * to current session. */
-		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
+		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
@@ -856,10 +865,10 @@ found:
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &net->xfrm.state_all);
 			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
-			h = xfrm_src_hash(net, daddr, saddr, family);
+			h = xfrm_src_hash(net, daddr, saddr, encap_family);
 			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 			if (x->id.spi) {
-				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family);
+				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
-- 
cgit v0.10.2


From 9828e6e6e3f19efcb476c567b9999891d051f52f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 20 Sep 2010 15:40:35 -0700
Subject: rose: Fix signedness issues wrt. digi count.

Just use explicit casts, since we really can't change the
types of structures exported to userspace which have been
around for 15 years or so.

Reported-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8e45e76..d952e7e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -679,7 +679,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
 		return -EINVAL;
 
-	if (addr->srose_ndigis > ROSE_MAX_DIGIS)
+	if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
 		return -EINVAL;
 
 	if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) {
@@ -739,7 +739,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 	if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1)
 		return -EINVAL;
 
-	if (addr->srose_ndigis > ROSE_MAX_DIGIS)
+	if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
 		return -EINVAL;
 
 	/* Source + Destination digis should not exceed ROSE_MAX_DIGIS */
-- 
cgit v0.10.2


From a4d258036ed9b2a1811c3670c6099203a0f284a0 Mon Sep 17 00:00:00 2001
From: Tom Marshall <tdm.code@gmail.com>
Date: Mon, 20 Sep 2010 15:42:05 -0700
Subject: tcp: Fix race in tcp_poll

If a RST comes in immediately after checking sk->sk_err, tcp_poll will
return POLLIN but not POLLOUT.  Fix this by checking sk->sk_err at the end
of tcp_poll.  Additionally, ensure the correct order of operations on SMP
machines with memory barriers.

Signed-off-by: Tom Marshall <tdm.code@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3fb1428..95d75d4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -386,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	 */
 
 	mask = 0;
-	if (sk->sk_err)
-		mask = POLLERR;
 
 	/*
 	 * POLLHUP is certainly not done right. But poll() doesn't
@@ -457,6 +455,11 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		if (tp->urg_data & TCP_URG_VALID)
 			mask |= POLLPRI;
 	}
+	/* This barrier is coupled with smp_wmb() in tcp_reset() */
+	smp_rmb();
+	if (sk->sk_err)
+		mask |= POLLERR;
+
 	return mask;
 }
 EXPORT_SYMBOL(tcp_poll);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e663b78..149e79a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4048,6 +4048,8 @@ static void tcp_reset(struct sock *sk)
 	default:
 		sk->sk_err = ECONNRESET;
 	}
+	/* This barrier is coupled with smp_rmb() in tcp_poll() */
+	smp_wmb();
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_error_report(sk);
-- 
cgit v0.10.2


From 8df8fd27123054b02007361bd5483775db84b4a8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 20 Sep 2010 02:28:59 +0000
Subject: qlcnic: dont set skb->truesize

skb->truesize is set in core network.

Dont change it unless dealing with fragments.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c
index 60ab753..2c7cf0b 100644
--- a/drivers/net/qlcnic/qlcnic_init.c
+++ b/drivers/net/qlcnic/qlcnic_init.c
@@ -1404,7 +1404,6 @@ qlcnic_process_rcv(struct qlcnic_adapter *adapter,
 	if (pkt_offset)
 		skb_pull(skb, pkt_offset);
 
-	skb->truesize = skb->len + sizeof(struct sk_buff);
 	skb->protocol = eth_type_trans(skb, netdev);
 
 	napi_gro_receive(&sds_ring->napi, skb);
@@ -1466,8 +1465,6 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter,
 
 	skb_put(skb, lro_length + data_offset);
 
-	skb->truesize = skb->len + sizeof(struct sk_buff) + skb_headroom(skb);
-
 	skb_pull(skb, l2_hdr_offset);
 	skb->protocol = eth_type_trans(skb, netdev);
 
@@ -1700,8 +1697,6 @@ qlcnic_process_rcv_diag(struct qlcnic_adapter *adapter,
 	if (pkt_offset)
 		skb_pull(skb, pkt_offset);
 
-	skb->truesize = skb->len + sizeof(struct sk_buff);
-
 	if (!qlcnic_check_loopback_buff(skb->data))
 		adapter->diag_cnt++;
 
-- 
cgit v0.10.2


From 7e96dc7045bff8758804b047c0dfb6868f182500 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 13:04:04 -0700
Subject: netxen: dont set skb->truesize

skb->truesize is set in core network.

Dont change it unless dealing with fragments.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c
index cabae7b..b075a35 100644
--- a/drivers/net/netxen/netxen_nic_init.c
+++ b/drivers/net/netxen/netxen_nic_init.c
@@ -1540,7 +1540,6 @@ netxen_process_rcv(struct netxen_adapter *adapter,
 	if (pkt_offset)
 		skb_pull(skb, pkt_offset);
 
-	skb->truesize = skb->len + sizeof(struct sk_buff);
 	skb->protocol = eth_type_trans(skb, netdev);
 
 	napi_gro_receive(&sds_ring->napi, skb);
@@ -1602,8 +1601,6 @@ netxen_process_lro(struct netxen_adapter *adapter,
 
 	skb_put(skb, lro_length + data_offset);
 
-	skb->truesize = skb->len + sizeof(struct sk_buff) + skb_headroom(skb);
-
 	skb_pull(skb, l2_hdr_offset);
 	skb->protocol = eth_type_trans(skb, netdev);
 
-- 
cgit v0.10.2


From 3d13008e7345fa7a79d8f6438150dc15d6ba6e9d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 08:47:45 +0000
Subject: ip: fix truesize mismatch in ip fragmentation

Special care should be taken when slow path is hit in ip_fragment() :

When walking through frags, we transfert truesize ownership from skb to
frags. Then if we hit a slow_path condition, we must undo this or risk
uncharging frags->truesize twice, and in the end, having negative socket
sk_wmem_alloc counter, or even freeing socket sooner than expected.

Many thanks to Nick Bowler, who provided a very clean bug report and
test program.

Thanks to Jarek for reviewing my first patch and providing a V2

While Nick bisection pointed to commit 2b85a34e911 (net: No more
expensive sock_hold()/sock_put() on each tx), underlying bug is older
(2.6.12-rc5)

A side effect is to extend work done in commit b2722b1c3a893e
(ip_fragment: also adjust skb->truesize for packets not owned by a
socket) to ipv6 as well.

Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
Tested-by: Nick Bowler <nbowler@elliptictech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04b6989..7649d77 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -488,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 * we can switch to copy when see the first bad fragment.
 	 */
 	if (skb_has_frags(skb)) {
-		struct sk_buff *frag;
+		struct sk_buff *frag, *frag2;
 		int first_len = skb_pagelen(skb);
-		int truesizes = 0;
 
 		if (first_len - hlen > mtu ||
 		    ((first_len - hlen) & 7) ||
@@ -503,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			if (frag->len > mtu ||
 			    ((frag->len & 7) && frag->next) ||
 			    skb_headroom(frag) < hlen)
-			    goto slow_path;
+				goto slow_path_clean;
 
 			/* Partially cloned skb? */
 			if (skb_shared(frag))
-				goto slow_path;
+				goto slow_path_clean;
 
 			BUG_ON(frag->sk);
 			if (skb->sk) {
 				frag->sk = skb->sk;
 				frag->destructor = sock_wfree;
 			}
-			truesizes += frag->truesize;
+			skb->truesize -= frag->truesize;
 		}
 
 		/* Everything is OK. Generate! */
@@ -524,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		frag = skb_shinfo(skb)->frag_list;
 		skb_frag_list_init(skb);
 		skb->data_len = first_len - skb_headlen(skb);
-		skb->truesize -= truesizes;
 		skb->len = first_len;
 		iph->tot_len = htons(first_len);
 		iph->frag_off = htons(IP_MF);
@@ -576,6 +574,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		}
 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 		return err;
+
+slow_path_clean:
+		skb_walk_frags(skb, frag2) {
+			if (frag2 == frag)
+				break;
+			frag2->sk = NULL;
+			frag2->destructor = NULL;
+			skb->truesize += frag2->truesize;
+		}
 	}
 
 slow_path:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d40b330..980912e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -639,7 +639,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	if (skb_has_frags(skb)) {
 		int first_len = skb_pagelen(skb);
-		int truesizes = 0;
+		struct sk_buff *frag2;
 
 		if (first_len - hlen > mtu ||
 		    ((first_len - hlen) & 7) ||
@@ -651,18 +651,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			if (frag->len > mtu ||
 			    ((frag->len & 7) && frag->next) ||
 			    skb_headroom(frag) < hlen)
-			    goto slow_path;
+				goto slow_path_clean;
 
 			/* Partially cloned skb? */
 			if (skb_shared(frag))
-				goto slow_path;
+				goto slow_path_clean;
 
 			BUG_ON(frag->sk);
 			if (skb->sk) {
 				frag->sk = skb->sk;
 				frag->destructor = sock_wfree;
-				truesizes += frag->truesize;
 			}
+			skb->truesize -= frag->truesize;
 		}
 
 		err = 0;
@@ -693,7 +693,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 		first_len = skb_pagelen(skb);
 		skb->data_len = first_len - skb_headlen(skb);
-		skb->truesize -= truesizes;
 		skb->len = first_len;
 		ipv6_hdr(skb)->payload_len = htons(first_len -
 						   sizeof(struct ipv6hdr));
@@ -756,6 +755,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			      IPSTATS_MIB_FRAGFAILS);
 		dst_release(&rt->dst);
 		return err;
+
+slow_path_clean:
+		skb_walk_frags(skb, frag2) {
+			if (frag2 == frag)
+				break;
+			frag2->sk = NULL;
+			frag2->destructor = NULL;
+			skb->truesize += frag2->truesize;
+		}
 	}
 
 slow_path:
-- 
cgit v0.10.2


From d485d500cf6b13a33bc7a6c09091deea7ea603ca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 21:17:29 +0000
Subject: netfilter: tproxy: nf_tproxy_assign_sock() can handle tw sockets

transparent field of a socket is either inet_twsk(sk)->tw_transparent
for timewait sockets, or inet_sk(sk)->transparent for other sockets
(TCP/UDP).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 5490fc3..daab8c4 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -70,7 +70,11 @@ nf_tproxy_destructor(struct sk_buff *skb)
 int
 nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
 {
-	if (inet_sk(sk)->transparent) {
+	bool transparent = (sk->sk_state == TCP_TIME_WAIT) ?
+				inet_twsk(sk)->tw_transparent :
+				inet_sk(sk)->transparent;
+
+	if (transparent) {
 		skb_orphan(skb);
 		skb->sk = sk;
 		skb->destructor = nf_tproxy_destructor;
-- 
cgit v0.10.2


From 7874896a26624214bd7c05eeba7c8ab01548b1b5 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 21 Sep 2010 21:17:30 +0000
Subject: netfilter: nf_ct_sip: default to NF_ACCEPT in sip_help_tcp()

I initially noticed this because of the compiler warning below, but it
does seem to be a valid concern in the case where ct_sip_get_header()
returns 0 in the first iteration of the while loop.

net/netfilter/nf_conntrack_sip.c: In function 'sip_help_tcp':
net/netfilter/nf_conntrack_sip.c:1379: warning: 'ret' may be used uninitialized in this function

Signed-off-by: Simon Horman <horms@verge.net.au>
[Patrick: changed NF_DROP to NF_ACCEPT]
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 53d8922..f64de95 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1376,7 +1376,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	unsigned int msglen, origlen;
 	const char *dptr, *end;
 	s16 diff, tdiff = 0;
-	int ret;
+	int ret = NF_ACCEPT;
 	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
-- 
cgit v0.10.2


From b46ffb854554ff939701bdd492b81558da5706fc Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 21 Sep 2010 21:17:31 +0000
Subject: netfilter: fix ipt_REJECT TCP RST routing for indev == outdev

ip_route_me_harder can't create the route cache when the outdev is the same
with the indev for the skbs whichout a valid protocol set.

__mkroute_input functions has this check:
1998         if (skb->protocol != htons(ETH_P_IP)) {
1999                 /* Not IP (i.e. ARP). Do not create route, if it is
2000                  * invalid for proxy arp. DNAT routes are always valid.
2001                  *
2002                  * Proxy arp feature have been extended to allow, ARP
2003                  * replies back to the same interface, to support
2004                  * Private VLAN switch technologies. See arp.c.
2005                  */
2006                 if (out_dev == in_dev &&
2007                     IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
2008                         err = -EINVAL;
2009                         goto cleanup;
2010                 }
2011         }

This patch gives the new skb a valid protocol to bypass this check. In order
to make ipt_REJECT work with bridges, you also need to enable ip_forward.

This patch also fixes a regression. When we used skb_copy_expand(), we
didn't have this issue stated above, as the protocol was properly set.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b254daf..43eec80 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -112,6 +112,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* ip_route_me_harder expects skb->dst to be set */
 	skb_dst_set_noref(nskb, skb_dst(oldskb));
 
+	nskb->protocol = htons(ETH_P_IP);
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
-- 
cgit v0.10.2


From 15cdeadaa5d76009e20c7792aed69f5a73808f97 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 21 Sep 2010 21:17:32 +0000
Subject: netfilter: fix a race in nf_ct_ext_create()

As soon as rcu_read_unlock() is called, there is no guarantee current
thread can safely derefence t pointer, rcu protected.

Fix is to copy t->alloc_size in a temporary variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 7dcf7a4..8d9e4c9 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -48,15 +48,17 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 {
 	unsigned int off, len;
 	struct nf_ct_ext_type *t;
+	size_t alloc_size;
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
 	BUG_ON(t == NULL);
 	off = ALIGN(sizeof(struct nf_ct_ext), t->align);
 	len = off + t->len;
+	alloc_size = t->alloc_size;
 	rcu_read_unlock();
 
-	*ext = kzalloc(t->alloc_size, gfp);
+	*ext = kzalloc(alloc_size, gfp);
 	if (!*ext)
 		return NULL;
 
-- 
cgit v0.10.2


From d6120b8afacec587f5feb37781bc751bc5d68a10 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 21 Sep 2010 21:17:33 +0000
Subject: netfilter: nf_nat_snmp: fix checksum calculation (v4)

Fix checksum calculation in nf_nat_snmp_basic.

Based on patches by Clark Wang <wtweeker@163.com> and
Stephen Hemminger <shemminger@vyatta.com>.

https://bugzilla.kernel.org/show_bug.cgi?id=17622

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 1679e2c..ee5f419 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -893,13 +893,15 @@ static void fast_csum(__sum16 *csum,
 	unsigned char s[4];
 
 	if (offset & 1) {
-		s[0] = s[2] = 0;
+		s[0] = ~0;
 		s[1] = ~*optr;
+		s[2] = 0;
 		s[3] = *nptr;
 	} else {
-		s[1] = s[3] = 0;
 		s[0] = ~*optr;
+		s[1] = ~0;
 		s[2] = *nptr;
+		s[3] = 0;
 	}
 
 	*csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-- 
cgit v0.10.2


From cbdd769ab9de26764bde0520a91536caa1587e13 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 21 Sep 2010 21:17:34 +0000
Subject: netfilter: nf_conntrack_defrag: check socket type before touching
 nodefrag flag

we need to check proper socket type within ipv4_conntrack_defrag
function before referencing the nodefrag flag.

For example the tun driver receive path produces skbs with
AF_UNSPEC socket type, and so current code is causing unwanted
fragmented packets going out.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index eab8de3..f3a9b42 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,9 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
+	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(skb->sk);
 
-	if (inet && inet->nodefrag)
+	if (sk && (sk->sk_family == PF_INET) &&
+	    inet->nodefrag)
 		return NF_ACCEPT;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-- 
cgit v0.10.2


From 56b49f4b8f6728b91d10c556c116175051b77b60 Mon Sep 17 00:00:00 2001
From: Ollie Wild <aaw@google.com>
Date: Wed, 22 Sep 2010 05:54:54 +0000
Subject: net: Move "struct net" declaration inside the __KERNEL__ macro guard

This patch reduces namespace pollution by moving the "struct net" declaration
out of the userspace-facing portion of linux/netlink.h.  It has no impact on
the kernel.

(This came up because we have several C++ applications which use "net" as a
namespace name.)

Signed-off-by: Ollie Wild <aaw@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 59d0669..1235669 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -27,8 +27,6 @@
 
 #define MAX_LINKS 32		
 
-struct net;
-
 struct sockaddr_nl {
 	sa_family_t	nl_family;	/* AF_NETLINK	*/
 	unsigned short	nl_pad;		/* zero		*/
@@ -151,6 +149,8 @@ struct nlattr {
 #include <linux/capability.h>
 #include <linux/skbuff.h>
 
+struct net;
+
 static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
 {
 	return (struct nlmsghdr *)skb->data;
-- 
cgit v0.10.2


From ec5a32f67c603b11d68eb283d94eb89a4f6cfce1 Mon Sep 17 00:00:00 2001
From: Luca Tettamanti <kronos.it@gmail.com>
Date: Wed, 22 Sep 2010 10:41:58 +0000
Subject: atl1: fix resume

adapter->cmb.cmb is initialized when the device is opened and freed when
it's closed. Accessing it unconditionally during resume results either
in a crash (NULL pointer dereference, when the interface has not been
opened yet) or data corruption (when the interface has been used and
brought down adapter->cmb.cmb points to a deallocated memory area).

Cc: stable@kernel.org
Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
Acked-by: Chris Snook <chris.snook@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 63b9ba0..bbd6e30 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -2847,10 +2847,11 @@ static int atl1_resume(struct pci_dev *pdev)
 	pci_enable_wake(pdev, PCI_D3cold, 0);
 
 	atl1_reset_hw(&adapter->hw);
-	adapter->cmb.cmb->int_stats = 0;
 
-	if (netif_running(netdev))
+	if (netif_running(netdev)) {
+		adapter->cmb.cmb->int_stats = 0;
 		atl1_up(adapter);
+	}
 	netif_device_attach(netdev);
 
 	return 0;
-- 
cgit v0.10.2


From 3f5a2a713aad28480d86b0add00c68484b54febc Mon Sep 17 00:00:00 2001
From: Luca Tettamanti <kronos.it@gmail.com>
Date: Wed, 22 Sep 2010 10:42:31 +0000
Subject: atl1: zero out CMB and SBM in atl1_free_ring_resources

They are allocated in atl1_setup_ring_resources, zero out the pointers
in atl1_free_ring_resources (like the other resources).

Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
Acked-by: Chris Snook <chris.snook@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index bbd6e30..c73be28 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -1251,6 +1251,12 @@ static void atl1_free_ring_resources(struct atl1_adapter *adapter)
 
 	rrd_ring->desc = NULL;
 	rrd_ring->dma = 0;
+
+	adapter->cmb.dma = 0;
+	adapter->cmb.cmb = NULL;
+
+	adapter->smb.dma = 0;
+	adapter->smb.smb = NULL;
 }
 
 static void atl1_setup_mac_ctrl(struct atl1_adapter *adapter)
-- 
cgit v0.10.2


From 94e2238969e89f5112297ad2a00103089dde7e8f Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Wed, 22 Sep 2010 06:45:11 +0000
Subject: xfrm4: strip ECN bits from tos field

otherwise ECT(1) bit will get interpreted as RTO_ONLINK
and routing will fail with XfrmOutBundleGenError.

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 869078d..a580349 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -61,7 +61,7 @@ static int xfrm4_get_saddr(struct net *net,
 
 static int xfrm4_get_tos(struct flowi *fl)
 {
-	return fl->fl4_tos;
+	return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */
 }
 
 static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
-- 
cgit v0.10.2


From 8395ae8303255b31a8625035fc98391c88b0c257 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 22 Sep 2010 17:15:08 +0000
Subject: e1000e: 82577/8/9 issues with device in Sx

When going to Sx, disable gigabit in PHY (e1000_oem_bits_config_ich8lan)
in addition to the MAC before configuring PHY wakeup otherwise the PHY
configuration writes might be missed.  Also write the LED configuration
and SMBus address to the PHY registers (e1000_oem_bits_config_ich8lan and
e1000_write_smbus_addr, respectively).  The reset is no longer needed
since re-auto-negotiation is forced in e1000_oem_bits_config_ich8lan and
leaving it in causes issues with auto-negotiating the link.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 63930d1..822de48 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -125,6 +125,7 @@
 
 /* SMBus Address Phy Register */
 #define HV_SMB_ADDR            PHY_REG(768, 26)
+#define HV_SMB_ADDR_MASK       0x007F
 #define HV_SMB_ADDR_PEC_EN     0x0200
 #define HV_SMB_ADDR_VALID      0x0080
 
@@ -895,6 +896,34 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
 }
 
 /**
+ *  e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states
+ *  @hw: pointer to the HW structure
+ *
+ *  Assumes semaphore already acquired.
+ *
+ **/
+static s32 e1000_write_smbus_addr(struct e1000_hw *hw)
+{
+	u16 phy_data;
+	u32 strap = er32(STRAP);
+	s32 ret_val = 0;
+
+	strap &= E1000_STRAP_SMBUS_ADDRESS_MASK;
+
+	ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy_data &= ~HV_SMB_ADDR_MASK;
+	phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT);
+	phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID;
+	ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data);
+
+out:
+	return ret_val;
+}
+
+/**
  *  e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration
  *  @hw:   pointer to the HW structure
  *
@@ -970,12 +999,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
 		 * When both NVM bits are cleared, SW will configure
 		 * them instead.
 		 */
-		data = er32(STRAP);
-		data &= E1000_STRAP_SMBUS_ADDRESS_MASK;
-		reg_data = data >> E1000_STRAP_SMBUS_ADDRESS_SHIFT;
-		reg_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID;
-		ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR,
-							reg_data);
+		ret_val = e1000_write_smbus_addr(hw);
 		if (ret_val)
 			goto out;
 
@@ -3460,13 +3484,20 @@ void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw)
 void e1000e_disable_gig_wol_ich8lan(struct e1000_hw *hw)
 {
 	u32 phy_ctrl;
+	s32 ret_val;
 
 	phy_ctrl = er32(PHY_CTRL);
 	phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_GBE_DISABLE;
 	ew32(PHY_CTRL, phy_ctrl);
 
-	if (hw->mac.type >= e1000_pchlan)
-		e1000_phy_hw_reset_ich8lan(hw);
+	if (hw->mac.type >= e1000_pchlan) {
+		e1000_oem_bits_config_ich8lan(hw, true);
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return;
+		e1000_write_smbus_addr(hw);
+		hw->phy.ops.release(hw);
+	}
 }
 
 /**
-- 
cgit v0.10.2


From 87fb7410cd8d4396dee0155526568645adba3b99 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 22 Sep 2010 17:15:33 +0000
Subject: e1000e: 82579 SMBus address and LEDs incorrect after device reset

Since the hardware is prevented from performing automatic PHY configuration
(the driver does it instead), the OEM_WRITE_ENABLE bit in the EXTCNF_CTRL
register will not get cleared preventing the SMBus address and the LED
configuration to be written to the PHY registers.  On 82579, do not check
the OEM_WRITE_ENABLE bit.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 822de48..fc8c3ce 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -990,9 +990,9 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
 	cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK;
 	cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT;
 
-	if (!(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) &&
-	    ((hw->mac.type == e1000_pchlan) ||
-	     (hw->mac.type == e1000_pch2lan))) {
+	if ((!(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) &&
+	    (hw->mac.type == e1000_pchlan)) ||
+	     (hw->mac.type == e1000_pch2lan)) {
 		/*
 		 * HW configures the SMBus address and LEDs when the
 		 * OEM and LCD Write Enable bits are set in the NVM.
-- 
cgit v0.10.2


From 5f3eed6fe0e36e4b56c8dd9160241a868ee0de2a Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 22 Sep 2010 17:15:54 +0000
Subject: e1000e: 82566DC fails to get link

Two recent patches to cleanup the reset[1] and initial PHY configuration[2]
code paths for ICH/PCH devices inadvertently left out a 10msec delay and
device ID check respectively which are necessary for the 82566DC (device id
0x104b) to be configured properly, otherwise it will not get link.

[1] commit e98cac447cc1cc418dff1d610a5c79c4f2bdec7f
[2] commit 3f0c16e84438d657d29446f85fe375794a93f159

CC: stable@kernel.org
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index fc8c3ce..6f9cb0d 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -932,7 +932,6 @@ out:
  **/
 static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
 {
-	struct e1000_adapter *adapter = hw->adapter;
 	struct e1000_phy_info *phy = &hw->phy;
 	u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask;
 	s32 ret_val = 0;
@@ -950,7 +949,8 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
 		if (phy->type != e1000_phy_igp_3)
 			return ret_val;
 
-		if (adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) {
+		if ((hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) ||
+		    (hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_C)) {
 			sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG;
 			break;
 		}
@@ -1626,6 +1626,9 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
 	if (e1000_check_reset_block(hw))
 		goto out;
 
+	/* Allow time for h/w to get to quiescent state after reset */
+	msleep(10);
+
 	/* Perform any necessary post-reset workarounds */
 	switch (hw->mac.type) {
 	case e1000_pchlan:
-- 
cgit v0.10.2


From 831bd2e6a6c09588fdde453ecb858f050ac1b942 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 22 Sep 2010 17:16:18 +0000
Subject: e1000e: 82579 unaccounted missed packets

On 82579, there is a hardware bug that can cause received packets to not
get transferred from the PHY to the MAC due to K1 (a power saving feature
of the PHY-MAC interconnect similar to ASPM L1).  Since the MAC controls
the accounting of missed packets, these will go unnoticed.  Workaround the
issue by setting the K1 beacon duration according to the link speed.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/e1000e/hw.h b/drivers/net/e1000e/hw.h
index 66ed08f..ba302a5 100644
--- a/drivers/net/e1000e/hw.h
+++ b/drivers/net/e1000e/hw.h
@@ -57,6 +57,7 @@ enum e1e_registers {
 	E1000_SCTL     = 0x00024, /* SerDes Control - RW */
 	E1000_FCAL     = 0x00028, /* Flow Control Address Low - RW */
 	E1000_FCAH     = 0x0002C, /* Flow Control Address High -RW */
+	E1000_FEXTNVM4 = 0x00024, /* Future Extended NVM 4 - RW */
 	E1000_FEXTNVM  = 0x00028, /* Future Extended NVM - RW */
 	E1000_FCT      = 0x00030, /* Flow Control Type - RW */
 	E1000_VET      = 0x00038, /* VLAN Ether Type - RW */
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 6f9cb0d..89b1e1a 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -105,6 +105,10 @@
 #define E1000_FEXTNVM_SW_CONFIG		1
 #define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* Bit redefined for ICH8M :/ */
 
+#define E1000_FEXTNVM4_BEACON_DURATION_MASK    0x7
+#define E1000_FEXTNVM4_BEACON_DURATION_8USEC   0x7
+#define E1000_FEXTNVM4_BEACON_DURATION_16USEC  0x3
+
 #define PCIE_ICH8_SNOOP_ALL		PCIE_NO_SNOOP_ALL
 
 #define E1000_ICH_RAR_ENTRIES		7
@@ -238,6 +242,7 @@ static s32  e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link);
 static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw);
 static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw);
 static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw);
+static s32 e1000_k1_workaround_lv(struct e1000_hw *hw);
 
 static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg)
 {
@@ -653,6 +658,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 			goto out;
 	}
 
+	if (hw->mac.type == e1000_pch2lan) {
+		ret_val = e1000_k1_workaround_lv(hw);
+		if (ret_val)
+			goto out;
+	}
+
 	/*
 	 * Check if there was DownShift, must be checked
 	 * immediately after link-up
@@ -1583,6 +1594,43 @@ out:
 }
 
 /**
+ *  e1000_k1_gig_workaround_lv - K1 Si workaround
+ *  @hw:   pointer to the HW structure
+ *
+ *  Workaround to set the K1 beacon duration for 82579 parts
+ **/
+static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 status_reg = 0;
+	u32 mac_reg;
+
+	if (hw->mac.type != e1000_pch2lan)
+		goto out;
+
+	/* Set K1 beacon duration based on 1Gbps speed or otherwise */
+	ret_val = e1e_rphy(hw, HV_M_STATUS, &status_reg);
+	if (ret_val)
+		goto out;
+
+	if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE))
+	    == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) {
+		mac_reg = er32(FEXTNVM4);
+		mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
+
+		if (status_reg & HV_M_STATUS_SPEED_1000)
+			mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
+		else
+			mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC;
+
+		ew32(FEXTNVM4, mac_reg);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
  *  e1000_lan_init_done_ich8lan - Check for PHY config completion
  *  @hw: pointer to the HW structure
  *
-- 
cgit v0.10.2


From a1ce647378c0262fe72757f989e961b2de6460a5 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 22 Sep 2010 17:16:40 +0000
Subject: e1000e: 82579 jumbo frame workaround causing CRC errors

The subject workaround was causing CRC errors due to writing the wrong
register with updates of the RCTL register.  It was also found that the
workaround function which modifies the RCTL register was being called in
the middle of a read-modify-write operation of the RCTL register, so the
function call has been moved appropriately.  Lastly, jumbo frames must not
be allowed when CRC stripping is disabled by a module parameter because the
workaround requires the CRC be stripped.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 89b1e1a..bb346ae 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -1475,10 +1475,6 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
 			goto out;
 
 		/* Enable jumbo frame workaround in the PHY */
-		e1e_rphy(hw, PHY_REG(769, 20), &data);
-		ret_val = e1e_wphy(hw, PHY_REG(769, 20), data & ~(1 << 14));
-		if (ret_val)
-			goto out;
 		e1e_rphy(hw, PHY_REG(769, 23), &data);
 		data &= ~(0x7F << 5);
 		data |= (0x37 << 5);
@@ -1487,7 +1483,6 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
 			goto out;
 		e1e_rphy(hw, PHY_REG(769, 16), &data);
 		data &= ~(1 << 13);
-		data |= (1 << 12);
 		ret_val = e1e_wphy(hw, PHY_REG(769, 16), data);
 		if (ret_val)
 			goto out;
@@ -1512,7 +1507,7 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
 
 		mac_reg = er32(RCTL);
 		mac_reg &= ~E1000_RCTL_SECRC;
-		ew32(FFLT_DBG, mac_reg);
+		ew32(RCTL, mac_reg);
 
 		ret_val = e1000e_read_kmrn_reg(hw,
 						E1000_KMRNCTRLSTA_CTRL_OFFSET,
@@ -1538,17 +1533,12 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
 			goto out;
 
 		/* Write PHY register values back to h/w defaults */
-		e1e_rphy(hw, PHY_REG(769, 20), &data);
-		ret_val = e1e_wphy(hw, PHY_REG(769, 20), data & ~(1 << 14));
-		if (ret_val)
-			goto out;
 		e1e_rphy(hw, PHY_REG(769, 23), &data);
 		data &= ~(0x7F << 5);
 		ret_val = e1e_wphy(hw, PHY_REG(769, 23), data);
 		if (ret_val)
 			goto out;
 		e1e_rphy(hw, PHY_REG(769, 16), &data);
-		data &= ~(1 << 12);
 		data |= (1 << 13);
 		ret_val = e1e_wphy(hw, PHY_REG(769, 16), data);
 		if (ret_val)
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 2b8ef44..e561d15 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2704,6 +2704,16 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
 	u32 psrctl = 0;
 	u32 pages = 0;
 
+	/* Workaround Si errata on 82579 - configure jumbo frame flow */
+	if (hw->mac.type == e1000_pch2lan) {
+		s32 ret_val;
+
+		if (adapter->netdev->mtu > ETH_DATA_LEN)
+			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true);
+		else
+			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false);
+	}
+
 	/* Program MC offset vector base */
 	rctl = er32(RCTL);
 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
@@ -2744,16 +2754,6 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
 		e1e_wphy(hw, 22, phy_data);
 	}
 
-	/* Workaround Si errata on 82579 - configure jumbo frame flow */
-	if (hw->mac.type == e1000_pch2lan) {
-		s32 ret_val;
-
-		if (rctl & E1000_RCTL_LPE)
-			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true);
-		else
-			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false);
-	}
-
 	/* Setup buffer sizes */
 	rctl &= ~E1000_RCTL_SZ_4096;
 	rctl |= E1000_RCTL_BSEX;
@@ -4833,6 +4833,15 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
 		return -EINVAL;
 	}
 
+	/* Jumbo frame workaround on 82579 requires CRC be stripped */
+	if ((adapter->hw.mac.type == e1000_pch2lan) &&
+	    !(adapter->flags2 & FLAG2_CRC_STRIPPING) &&
+	    (new_mtu > ETH_DATA_LEN)) {
+		e_err("Jumbo Frames not supported on 82579 when CRC "
+		      "stripping is disabled.\n");
+		return -EINVAL;
+	}
+
 	/* 82573 Errata 17 */
 	if (((adapter->hw.mac.type == e1000_82573) ||
 	     (adapter->hw.mac.type == e1000_82574)) &&
-- 
cgit v0.10.2


From 605c82bab5abe0816e5e32716875c245f89f39da Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 22 Sep 2010 17:17:01 +0000
Subject: e1000e: 82579 do not gate auto config of PHY by hardware during
 nominal use

For non-managed versions of 82579, set the bit that prevents the hardware
from automatically configuring the PHY after resets only when the driver
performs a reset, clear the bit after resets.  This is so the hardware can
configure the PHY automatically when the part is reset in a manner that is
not controlled by the driver (e.g. in a virtual environment via PCI FLR)
otherwise the PHY will be mis-configured causing issues such as failing to
link at 1000Mbps.
For managed versions of 82579, keep the previous behavior since the
manageability firmware will handle the PHY configuration.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index bb346ae..57b5435 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -243,6 +243,7 @@ static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw);
 static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw);
 static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw);
 static s32 e1000_k1_workaround_lv(struct e1000_hw *hw);
+static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate);
 
 static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg)
 {
@@ -278,7 +279,7 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val)
 static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 {
 	struct e1000_phy_info *phy = &hw->phy;
-	u32 ctrl;
+	u32 ctrl, fwsm;
 	s32 ret_val = 0;
 
 	phy->addr                     = 1;
@@ -300,7 +301,8 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 	 * disabled, then toggle the LANPHYPC Value bit to force
 	 * the interconnect to PCIe mode.
 	 */
-	if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+	fwsm = er32(FWSM);
+	if (!(fwsm & E1000_ICH_FWSM_FW_VALID)) {
 		ctrl = er32(CTRL);
 		ctrl |=  E1000_CTRL_LANPHYPC_OVERRIDE;
 		ctrl &= ~E1000_CTRL_LANPHYPC_VALUE;
@@ -309,6 +311,13 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 		ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
 		ew32(CTRL, ctrl);
 		msleep(50);
+
+		/*
+		 * Gate automatic PHY configuration by hardware on
+		 * non-managed 82579
+		 */
+		if (hw->mac.type == e1000_pch2lan)
+			e1000_gate_hw_phy_config_ich8lan(hw, true);
 	}
 
 	/*
@@ -321,6 +330,13 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 	if (ret_val)
 		goto out;
 
+	/* Ungate automatic PHY configuration on non-managed 82579 */
+	if ((hw->mac.type == e1000_pch2lan)  &&
+	    !(fwsm & E1000_ICH_FWSM_FW_VALID)) {
+		msleep(10);
+		e1000_gate_hw_phy_config_ich8lan(hw, false);
+	}
+
 	phy->id = e1000_phy_unknown;
 	ret_val = e1000e_get_phy_id(hw);
 	if (ret_val)
@@ -567,13 +583,10 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_adapter *adapter)
 	if (mac->type == e1000_ich8lan)
 		e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, true);
 
-	/* Disable PHY configuration by hardware, config by software */
-	if (mac->type == e1000_pch2lan) {
-		u32 extcnf_ctrl = er32(EXTCNF_CTRL);
-
-		extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
-		ew32(EXTCNF_CTRL, extcnf_ctrl);
-	}
+	/* Gate automatic PHY configuration by hardware on managed 82579 */
+	if ((mac->type == e1000_pch2lan) &&
+	    (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+		e1000_gate_hw_phy_config_ich8lan(hw, true);
 
 	return 0;
 }
@@ -1621,6 +1634,32 @@ out:
 }
 
 /**
+ *  e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware
+ *  @hw:   pointer to the HW structure
+ *  @gate: boolean set to true to gate, false to ungate
+ *
+ *  Gate/ungate the automatic PHY configuration via hardware; perform
+ *  the configuration via software instead.
+ **/
+static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate)
+{
+	u32 extcnf_ctrl;
+
+	if (hw->mac.type != e1000_pch2lan)
+		return;
+
+	extcnf_ctrl = er32(EXTCNF_CTRL);
+
+	if (gate)
+		extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+	else
+		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+
+	ew32(EXTCNF_CTRL, extcnf_ctrl);
+	return;
+}
+
+/**
  *  e1000_lan_init_done_ich8lan - Check for PHY config completion
  *  @hw: pointer to the HW structure
  *
@@ -1695,6 +1734,13 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
 	/* Configure the LCD with the OEM bits in NVM */
 	ret_val = e1000_oem_bits_config_ich8lan(hw, true);
 
+	/* Ungate automatic PHY configuration on non-managed 82579 */
+	if ((hw->mac.type == e1000_pch2lan) &&
+	    !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+		msleep(10);
+		e1000_gate_hw_phy_config_ich8lan(hw, false);
+	}
+
 out:
 	return ret_val;
 }
@@ -1711,6 +1757,11 @@ static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw)
 {
 	s32 ret_val = 0;
 
+	/* Gate automatic PHY configuration by hardware on non-managed 82579 */
+	if ((hw->mac.type == e1000_pch2lan) &&
+	    !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+		e1000_gate_hw_phy_config_ich8lan(hw, true);
+
 	ret_val = e1000e_phy_hw_reset_generic(hw);
 	if (ret_val)
 		goto out;
@@ -2975,6 +3026,14 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
 		 * external PHY is reset.
 		 */
 		ctrl |= E1000_CTRL_PHY_RST;
+
+		/*
+		 * Gate automatic PHY configuration by hardware on
+		 * non-managed 82579
+		 */
+		if ((hw->mac.type == e1000_pch2lan) &&
+		    !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+			e1000_gate_hw_phy_config_ich8lan(hw, true);
 	}
 	ret_val = e1000_acquire_swflag_ich8lan(hw);
 	e_dbg("Issuing a global reset to ich8lan\n");
-- 
cgit v0.10.2


From f064af1e500a2bf4607706f0f458163bdb2a6ea5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Sep 2010 12:43:39 +0000
Subject: net: fix a lockdep splat

We have for each socket :

One spinlock (sk_slock.slock)
One rwlock (sk_callback_lock)

Possible scenarios are :

(A) (this is used in net/sunrpc/xprtsock.c)
read_lock(&sk->sk_callback_lock) (without blocking BH)
<BH>
spin_lock(&sk->sk_slock.slock);
...
read_lock(&sk->sk_callback_lock);
...

(B)
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)

(C)
spin_lock_bh(&sk->sk_slock)
...
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
spin_unlock_bh(&sk->sk_slock)

This (C) case conflicts with (A) :

CPU1 [A]                         CPU2 [C]
read_lock(callback_lock)
<BH>                             spin_lock_bh(slock)
<wait to spin_lock(slock)>
                                 <wait to write_lock_bh(callback_lock)>

We have one problematic (C) use case in inet_csk_listen_stop() :

local_bh_disable();
bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock)
WARN_ON(sock_owned_by_user(child));
...
sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock)

lockdep is not happy with this, as reported by Tetsuo Handa

It seems only way to deal with this is to use read_lock_bh(callbacklock)
everywhere.

Thanks to Jarek for pointing a bug in my first attempt and suggesting
this solution.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/sock.c b/net/core/sock.c
index b05b9b6..ef30e9d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1351,9 +1351,9 @@ int sock_i_uid(struct sock *sk)
 {
 	int uid;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	return uid;
 }
 EXPORT_SYMBOL(sock_i_uid);
@@ -1362,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk)
 {
 	unsigned long ino;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	return ino;
 }
 EXPORT_SYMBOL(sock_i_ino);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c397524..c519939 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk)
 	struct rds_connection *conn;
 	struct rds_tcp_connection *tc;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	conn = sk->sk_user_data;
 	if (conn == NULL) {
 		state_change = sk->sk_state_change;
@@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk)
 			break;
 	}
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	state_change(sk);
 }
 
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 975183f..27844f2 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
 
 	rdsdebug("listen data ready sk %p\n", sk);
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	ready = sk->sk_user_data;
 	if (ready == NULL) { /* check for teardown race */
 		ready = sk->sk_data_ready;
@@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
 		queue_work(rds_wq, &rds_tcp_listen_work);
 
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	ready(sk, bytes);
 }
 
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 1aba687..e437974 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
 
 	rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	conn = sk->sk_user_data;
 	if (conn == NULL) { /* check for teardown race */
 		ready = sk->sk_data_ready;
@@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
 	if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM)
 		queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 	ready(sk, bytes);
 }
 
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index a28b895..2f012a0 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk)
 	struct rds_connection *conn;
 	struct rds_tcp_connection *tc;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	conn = sk->sk_user_data;
 	if (conn == NULL) {
 		write_space = sk->sk_write_space;
@@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk)
 		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
 
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 
 	/*
 	 * write_space is only called when data leaves tcp's send queue if
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b6309db..fe9306b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -800,7 +800,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 	u32 _xid;
 	__be32 *xp;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	dprintk("RPC:       xs_udp_data_ready...\n");
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
@@ -852,7 +852,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
  dropit:
 	skb_free_datagram(sk, skb);
  out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
@@ -1229,7 +1229,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
 
 	dprintk("RPC:       xs_tcp_data_ready...\n");
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	if (xprt->shutdown)
@@ -1248,7 +1248,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
 		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
 	} while (read > 0);
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 /*
@@ -1301,7 +1301,7 @@ static void xs_tcp_state_change(struct sock *sk)
 {
 	struct rpc_xprt *xprt;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	dprintk("RPC:       xs_tcp_state_change client %p...\n", xprt);
@@ -1313,7 +1313,7 @@ static void xs_tcp_state_change(struct sock *sk)
 
 	switch (sk->sk_state) {
 	case TCP_ESTABLISHED:
-		spin_lock_bh(&xprt->transport_lock);
+		spin_lock(&xprt->transport_lock);
 		if (!xprt_test_and_set_connected(xprt)) {
 			struct sock_xprt *transport = container_of(xprt,
 					struct sock_xprt, xprt);
@@ -1327,7 +1327,7 @@ static void xs_tcp_state_change(struct sock *sk)
 
 			xprt_wake_pending_tasks(xprt, -EAGAIN);
 		}
-		spin_unlock_bh(&xprt->transport_lock);
+		spin_unlock(&xprt->transport_lock);
 		break;
 	case TCP_FIN_WAIT1:
 		/* The client initiated a shutdown of the socket */
@@ -1365,7 +1365,7 @@ static void xs_tcp_state_change(struct sock *sk)
 		xs_sock_mark_closed(xprt);
 	}
  out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 /**
@@ -1376,7 +1376,7 @@ static void xs_error_report(struct sock *sk)
 {
 	struct rpc_xprt *xprt;
 
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
 	dprintk("RPC:       %s client %p...\n"
@@ -1384,7 +1384,7 @@ static void xs_error_report(struct sock *sk)
 			__func__, xprt, sk->sk_err);
 	xprt_wake_pending_tasks(xprt, -EAGAIN);
 out:
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static void xs_write_space(struct sock *sk)
@@ -1416,13 +1416,13 @@ static void xs_write_space(struct sock *sk)
  */
 static void xs_udp_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 
 	/* from net/core/sock.c:sock_def_write_space */
 	if (sock_writeable(sk))
 		xs_write_space(sk);
 
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 /**
@@ -1437,13 +1437,13 @@ static void xs_udp_write_space(struct sock *sk)
  */
 static void xs_tcp_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	read_lock_bh(&sk->sk_callback_lock);
 
 	/* from net/core/stream.c:sk_stream_write_space */
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 		xs_write_space(sk);
 
-	read_unlock(&sk->sk_callback_lock);
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
-- 
cgit v0.10.2


From e0f9c4f332c99b213d4a0b7cd21dc0781ceb3d86 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Thu, 23 Sep 2010 10:59:18 +0000
Subject: de2104x: disable autonegotiation on broken hardware

At least on older 21041-AA chips (mine is rev. 11), TP duplex autonegotiation
causes the card not to work at all (link is up but no packets are transmitted).

de4x5 disables autonegotiation completely. But it seems to work on newer
(21041-PA rev. 21) so disable it only on rev<20 chips.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index 5efa577..9d6b7e9 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -364,6 +364,8 @@ static u16 t21040_csr15[] = { 0, 0, 0x0006, 0x0000, 0x0000, };
 /* 21041 transceiver register settings: TP AUTO, BNC, AUI, TP, TP FD*/
 static u16 t21041_csr13[] = { 0xEF01, 0xEF09, 0xEF09, 0xEF01, 0xEF09, };
 static u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x6F3F, 0x6F3D, };
+/* If on-chip autonegotiation is broken, use half-duplex (FF3F) instead */
+static u16 t21041_csr14_brk[] = { 0xFF3F, 0xF7FD, 0xF7FD, 0x6F3F, 0x6F3D, };
 static u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, };
 
 
@@ -1911,8 +1913,14 @@ fill_defaults:
 	for (i = 0; i < DE_MAX_MEDIA; i++) {
 		if (de->media[i].csr13 == 0xffff)
 			de->media[i].csr13 = t21041_csr13[i];
-		if (de->media[i].csr14 == 0xffff)
-			de->media[i].csr14 = t21041_csr14[i];
+		if (de->media[i].csr14 == 0xffff) {
+			/* autonegotiation is broken at least on some chip
+			   revisions - rev. 0x21 works, 0x11 does not */
+			if (de->pdev->revision < 0x20)
+				de->media[i].csr14 = t21041_csr14_brk[i];
+			else
+				de->media[i].csr14 = t21041_csr14[i];
+		}
 		if (de->media[i].csr15 == 0xffff)
 			de->media[i].csr15 = t21041_csr15[i];
 	}
-- 
cgit v0.10.2


From b0255a02351b00ca55f4eb2588d05a5db9dd1a58 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Fri, 24 Sep 2010 23:57:02 +0000
Subject: de2104x: fix power management

At least my 21041 cards come out of suspend with bus mastering disabled so
they did not work after resume(no data transferred).
After adding pci_set_master(), the driver oopsed immediately on resume -
because de_clean_rings() is called on suspend but de_init_rings() call
was missing in resume.

Also disable link (reset SIA) before sleep (de4x5 does this too).

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index 9d6b7e9..a0be7c2 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -1231,6 +1231,7 @@ static void de_adapter_sleep (struct de_private *de)
 	if (de->de21040)
 		return;
 
+	dw32(CSR13, 0); /* Reset phy */
 	pci_read_config_dword(de->pdev, PCIPM, &pmctl);
 	pmctl |= PM_Sleep;
 	pci_write_config_dword(de->pdev, PCIPM, pmctl);
@@ -2166,6 +2167,8 @@ static int de_resume (struct pci_dev *pdev)
 		dev_err(&dev->dev, "pci_enable_device failed in resume\n");
 		goto out;
 	}
+	pci_set_master(pdev);
+	de_init_rings(de);
 	de_init_hw(de);
 out_attach:
 	netif_device_attach(dev);
-- 
cgit v0.10.2


From ca9a783575d2affed30ef27a3427a7705527ddac Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Sat, 25 Sep 2010 10:39:17 +0000
Subject: de2104x: fix TP link detection

Compex FreedomLine 32 PnP-PCI2 cards have only TP and BNC connectors but the
SROM contains AUI port too. When TP loses link, the driver switches to
non-existing AUI port (which reports that carrier is always present).

Connecting TP back generates LinkPass interrupt but de_media_interrupt() is
broken - it only updates the link state of currently connected media, ignoring
the fact that LinkPass and LinkFail bits of MacStatus register belong to the
TP port only (the chip documentation says that).

This patch changes de_media_interrupt() to switch media to TP when link goes
up (and media type is not locked) and also to update the link state only when
the TP port is used.

Also the NonselPortActive (and also SelPortActive) bits of SIAStatus register
need to be cleared (by writing 1) after reading or they're useless.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index a0be7c2..9124c5c 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -243,6 +243,7 @@ enum {
 	NWayState		= (1 << 14) | (1 << 13) | (1 << 12),
 	NWayRestart		= (1 << 12),
 	NonselPortActive	= (1 << 9),
+	SelPortActive		= (1 << 8),
 	LinkFailStatus		= (1 << 2),
 	NetCxnErr		= (1 << 1),
 };
@@ -1066,6 +1067,9 @@ static void de21041_media_timer (unsigned long data)
 	unsigned int carrier;
 	unsigned long flags;
 
+	/* clear port active bits */
+	dw32(SIAStatus, NonselPortActive | SelPortActive);
+
 	carrier = (status & NetCxnErr) ? 0 : 1;
 
 	if (carrier) {
@@ -1160,14 +1164,29 @@ no_link_yet:
 static void de_media_interrupt (struct de_private *de, u32 status)
 {
 	if (status & LinkPass) {
+		/* Ignore if current media is AUI or BNC and we can't use TP */
+		if ((de->media_type == DE_MEDIA_AUI ||
+		     de->media_type == DE_MEDIA_BNC) &&
+		    (de->media_lock ||
+		     !de_ok_to_advertise(de, DE_MEDIA_TP_AUTO)))
+			return;
+		/* If current media is not TP, change it to TP */
+		if ((de->media_type == DE_MEDIA_AUI ||
+		     de->media_type == DE_MEDIA_BNC)) {
+			de->media_type = DE_MEDIA_TP_AUTO;
+			de_stop_rxtx(de);
+			de_set_media(de);
+			de_start_rxtx(de);
+		}
 		de_link_up(de);
 		mod_timer(&de->media_timer, jiffies + DE_TIMER_LINK);
 		return;
 	}
 
 	BUG_ON(!(status & LinkFail));
-
-	if (netif_carrier_ok(de->dev)) {
+	/* Mark the link as down only if current media is TP */
+	if (netif_carrier_ok(de->dev) && de->media_type != DE_MEDIA_AUI &&
+	    de->media_type != DE_MEDIA_BNC) {
 		de_link_down(de);
 		mod_timer(&de->media_timer, jiffies + DE_TIMER_NO_LINK);
 	}
-- 
cgit v0.10.2


From a3d6713fbd2ccb50898a6f88664da96a7857c039 Mon Sep 17 00:00:00 2001
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 23 Sep 2010 01:50:54 +0000
Subject: br2684: fix scheduling while atomic

You can't call atomic_notifier_chain_unregister() while in atomic context.

Fix, call un/register_atmdevice_notifier in module __init and __exit.

Bug report:
http://comments.gmane.org/gmane.linux.network/172603

Reported-by: Mikko Vinni <mmvinni@yahoo.com>
Tested-by: Mikko Vinni <mmvinni@yahoo.com>
Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 651babd..ad2b232 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -399,12 +399,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			unregister_netdev(net_dev);
 			free_netdev(net_dev);
 		}
-		read_lock_irq(&devs_lock);
-		if (list_empty(&br2684_devs)) {
-			/* last br2684 device */
-			unregister_atmdevice_notifier(&atm_dev_notifier);
-		}
-		read_unlock_irq(&devs_lock);
 		return;
 	}
 
@@ -675,7 +669,6 @@ static int br2684_create(void __user *arg)
 
 	if (list_empty(&br2684_devs)) {
 		/* 1st br2684 device */
-		register_atmdevice_notifier(&atm_dev_notifier);
 		brdev->number = 1;
 	} else
 		brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
@@ -815,6 +808,7 @@ static int __init br2684_init(void)
 		return -ENOMEM;
 #endif
 	register_atm_ioctl(&br2684_ioctl_ops);
+	register_atmdevice_notifier(&atm_dev_notifier);
 	return 0;
 }
 
@@ -830,9 +824,7 @@ static void __exit br2684_exit(void)
 #endif
 
 
-	/* if not already empty */
-	if (!list_empty(&br2684_devs))
-		unregister_atmdevice_notifier(&atm_dev_notifier);
+	unregister_atmdevice_notifier(&atm_dev_notifier);
 
 	while (!list_empty(&br2684_devs)) {
 		net_dev = list_entry_brdev(br2684_devs.next);
-- 
cgit v0.10.2


From 693019e90ca45d881109d32c0c6d29adf03f6447 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 23 Sep 2010 11:19:54 +0000
Subject: net: reset skb queue mapping when rx'ing over tunnel

Reset queue mapping when an skb is reentering the stack via a tunnel.
On second pass, the queue mapping from the original device is no
longer valid.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/dst.h b/include/net/dst.h
index 81d1413..0238650 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -242,6 +242,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += skb->len;
 	skb->rxhash = 0;
+	skb_set_queue_mapping(skb, 0);
 	skb_dst_drop(skb);
 	nf_reset(skb);
 }
-- 
cgit v0.10.2


From 62038e4a146b97352d5911e6ede36c58d4187c3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= <vincent.stehle@laposte.net>
Date: Sun, 26 Sep 2010 18:50:05 -0700
Subject: smsc911x: Add MODULE_ALIAS()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enables auto loading for the smsc911x ethernet driver.

Signed-off-by: Vincent Stehlé <vincent.stehle@laposte.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 0909ae9..8150ba1 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -58,6 +58,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_VERSION(SMSC_DRV_VERSION);
+MODULE_ALIAS("platform:smsc911x");
 
 #if USE_DEBUG > 0
 static int debug = 16;
-- 
cgit v0.10.2


From 52933f052186877afd218aef7a1b2dbdb010939f Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Sat, 25 Sep 2010 23:58:00 +0000
Subject: ibm_newemac: use free_netdev(netdev) instead of kfree()

Freeing netdev without free_netdev() leads to net, tx leaks.
I might lead to dereferencing freed pointer.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

@@
struct net_device* dev;
@@

-kfree(dev)
+free_netdev(dev)

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 3506fd6..519e19e 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -2928,7 +2928,7 @@ static int __devinit emac_probe(struct platform_device *ofdev,
 	if (dev->emac_irq != NO_IRQ)
 		irq_dispose_mapping(dev->emac_irq);
  err_free:
-	kfree(ndev);
+	free_netdev(ndev);
  err_gone:
 	/* if we were on the bootlist, remove us as we won't show up and
 	 * wake up all waiters to notify them in case they were waiting
@@ -2971,7 +2971,7 @@ static int __devexit emac_remove(struct platform_device *ofdev)
 	if (dev->emac_irq != NO_IRQ)
 		irq_dispose_mapping(dev->emac_irq);
 
-	kfree(dev->ndev);
+	free_netdev(dev->ndev);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 22138d307329e1968fc698821095b87c2fd5de12 Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Sat, 25 Sep 2010 23:58:03 +0000
Subject: rionet: use free_netdev(netdev) instead of kfree()

Freeing netdev without free_netdev() leads to net, tx leaks.
I might lead to dereferencing freed pointer.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

@@
struct net_device* dev;
@@

-kfree(dev)
+free_netdev(dev)

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 07eb884..44150f2 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -384,7 +384,7 @@ static void rionet_remove(struct rio_dev *rdev)
 	free_pages((unsigned long)rionet_active, rdev->net->hport->sys_size ?
 					__ilog2(sizeof(void *)) + 4 : 0);
 	unregister_netdev(ndev);
-	kfree(ndev);
+	free_netdev(ndev);
 
 	list_for_each_entry_safe(peer, tmp, &rionet_peers, node) {
 		list_del(&peer->node);
-- 
cgit v0.10.2


From 8d879de89807d82bc4cc3e9d73609b874fa9458c Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Sat, 25 Sep 2010 23:58:06 +0000
Subject: sgiseeq: use free_netdev(netdev) instead of kfree()

Freeing netdev without free_netdev() leads to net, tx leaks.
I might lead to dereferencing freed pointer.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

@@
struct net_device* dev;
@@

-kfree(dev)
+free_netdev(dev)

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index cc4bd8c..9265315 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -804,7 +804,7 @@ static int __devinit sgiseeq_probe(struct platform_device *pdev)
 err_out_free_page:
 	free_page((unsigned long) sp->srings);
 err_out_free_dev:
-	kfree(dev);
+	free_netdev(dev);
 
 err_out:
 	return err;
-- 
cgit v0.10.2


From bc68580d41b131396054a1a04a7df4948555ed97 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Sun, 26 Sep 2010 18:56:06 -0700
Subject: s390: use free_netdev(netdev) instead of kfree()

Freeing netdev without free_netdev() leads to net, tx leaks.
I might lead to dereferencing freed pointer.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

@@
struct net_device* dev;
@@

-kfree(dev)
+free_netdev(dev)

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 6edf20b..2c7d2d9 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1154,7 +1154,7 @@ static struct net_device *ctcm_init_netdevice(struct ctcm_priv *priv)
 				dev_fsm, dev_fsm_len, GFP_KERNEL);
 	if (priv->fsm == NULL) {
 		CTCMY_DBF_DEV(SETUP, dev, "init_fsm error");
-		kfree(dev);
+		free_netdev(dev);
 		return NULL;
 	}
 	fsm_newstate(priv->fsm, DEV_STATE_STOPPED);
@@ -1165,7 +1165,7 @@ static struct net_device *ctcm_init_netdevice(struct ctcm_priv *priv)
 		grp = ctcmpc_init_mpc_group(priv);
 		if (grp == NULL) {
 			MPC_DBF_DEV(SETUP, dev, "init_mpc_group error");
-			kfree(dev);
+			free_netdev(dev);
 			return NULL;
 		}
 		tasklet_init(&grp->mpc_tasklet2,
-- 
cgit v0.10.2


From 2cc6d2bf3d6195fabcf0febc192c01f99519a8f3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 24 Sep 2010 09:55:52 +0000
Subject: ipv6: add a missing unregister_pernet_subsys call

Clean up a missing exit path in the ipv6 module init routines.  In
addrconf_init we call ipv6_addr_label_init which calls register_pernet_subsys
for the ipv6_addr_label_ops structure.  But if module loading fails, or if the
ipv6 module is removed, there is no corresponding unregister_pernet_subsys call,
which leaves a now-bogus address on the pernet_list, leading to oopses in
subsequent registrations.  This patch cleans up both the failed load path and
the unload path.  Tested by myself with good results.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/net/addrconf.h |    1 +
 net/ipv6/addrconf.c    |   11 ++++++++---
 net/ipv6/addrlabel.c   |    5 +++++
 3 files changed, 14 insertions(+), 3 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 45375b4..4d40c4d 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -121,6 +121,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout)
  *	IPv6 Address Label subsystem (addrlabel.c)
  */
 extern int			ipv6_addr_label_init(void);
+extern void			ipv6_addr_label_cleanup(void);
 extern void			ipv6_addr_label_rtnl_register(void);
 extern u32			ipv6_addr_label(struct net *net,
 						const struct in6_addr *addr,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab70a3f..324fac3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4637,10 +4637,12 @@ int __init addrconf_init(void)
 	if (err < 0) {
 		printk(KERN_CRIT "IPv6 Addrconf:"
 		       " cannot initialize default policy table: %d.\n", err);
-		return err;
+		goto out;
 	}
 
-	register_pernet_subsys(&addrconf_ops);
+	err = register_pernet_subsys(&addrconf_ops);
+	if (err < 0)
+		goto out_addrlabel;
 
 	/* The addrconf netdev notifier requires that loopback_dev
 	 * has it's ipv6 private information allocated and setup
@@ -4692,7 +4694,9 @@ errout:
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
 	unregister_pernet_subsys(&addrconf_ops);
-
+out_addrlabel:
+	ipv6_addr_label_cleanup();
+out:
 	return err;
 }
 
@@ -4703,6 +4707,7 @@ void addrconf_cleanup(void)
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 	unregister_pernet_subsys(&addrconf_ops);
+	ipv6_addr_label_cleanup();
 
 	rtnl_lock();
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f0e774c..8175f80 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void)
 	return register_pernet_subsys(&ipv6_addr_label_ops);
 }
 
+void ipv6_addr_label_cleanup(void)
+{
+	unregister_pernet_subsys(&ipv6_addr_label_ops);
+}
+
 static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
 	[IFAL_LABEL]		= { .len = sizeof(u32), },
-- 
cgit v0.10.2