From f9af857489cc19ee3acd0d5248dca7d243e353a5 Mon Sep 17 00:00:00 2001
From: Matheos Worku <matheos.worku@sun.com>
Date: Mon, 12 May 2008 03:10:59 -0700
Subject: niu: Determine the # of ports from the card's VPD data

Determine the number of physical ports from the card's VPD data.
Previous fix failed on Maramba platform which doesn't have the
"board-model" property. This fix uses the "model" property which
exists on all cards and Neptune based motherboards.

cstyle cleanup included.

Signed-off-by: Matheos Worku <matheos.worku@sun.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 57cfd72..918f802 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -865,7 +865,6 @@ static int link_status_1g_serdes(struct niu *np, int *link_up_p)
 	return 0;
 }
 
-
 static int link_status_10g_serdes(struct niu *np, int *link_up_p)
 {
 	unsigned long flags;
@@ -900,7 +899,6 @@ static int link_status_10g_serdes(struct niu *np, int *link_up_p)
 	return 0;
 }
 
-
 static int link_status_1g_rgmii(struct niu *np, int *link_up_p)
 {
 	struct niu_link_config *lp = &np->link_config;
@@ -957,7 +955,6 @@ out:
 	return err;
 }
 
-
 static int bcm8704_reset(struct niu *np)
 {
 	int err, limit;
@@ -1357,8 +1354,6 @@ static int mii_reset(struct niu *np)
 	return 0;
 }
 
-
-
 static int xcvr_init_1g_rgmii(struct niu *np)
 {
 	int err;
@@ -1419,7 +1414,6 @@ static int xcvr_init_1g_rgmii(struct niu *np)
 	return 0;
 }
 
-
 static int mii_init_common(struct niu *np)
 {
 	struct niu_link_config *lp = &np->link_config;
@@ -7008,31 +7002,20 @@ static int __devinit niu_phy_type_prop_decode(struct niu *np,
 	return 0;
 }
 
-/* niu board models have a trailing dash version incremented
- * with HW rev change. Need to ingnore the  dash version while
- * checking for match
- *
- * for example, for the 10G card the current vpd.board_model
- * is 501-5283-04, of which -04 is the  dash version and have
- * to be ignored
- */
-static int niu_board_model_match(struct niu *np, const char *model)
-{
-	return !strncmp(np->vpd.board_model, model, strlen(model));
-}
-
 static int niu_pci_vpd_get_nports(struct niu *np)
 {
 	int ports = 0;
 
-	if ((niu_board_model_match(np, NIU_QGC_LP_BM_STR)) ||
-	    (niu_board_model_match(np, NIU_QGC_PEM_BM_STR)) ||
-	    (niu_board_model_match(np, NIU_ALONSO_BM_STR))) {
+	if ((!strcmp(np->vpd.model, NIU_QGC_LP_MDL_STR)) ||
+	    (!strcmp(np->vpd.model, NIU_QGC_PEM_MDL_STR)) ||
+	    (!strcmp(np->vpd.model, NIU_MARAMBA_MDL_STR)) ||
+	    (!strcmp(np->vpd.model, NIU_KIMI_MDL_STR)) ||
+	    (!strcmp(np->vpd.model, NIU_ALONSO_MDL_STR))) {
 		ports = 4;
-	} else if ((niu_board_model_match(np, NIU_2XGF_LP_BM_STR)) ||
-		   (niu_board_model_match(np, NIU_2XGF_PEM_BM_STR)) ||
-		   (niu_board_model_match(np, NIU_FOXXY_BM_STR)) ||
-		   (niu_board_model_match(np, NIU_2XGF_MRVL_BM_STR))) {
+	} else if ((!strcmp(np->vpd.model, NIU_2XGF_LP_MDL_STR)) ||
+		   (!strcmp(np->vpd.model, NIU_2XGF_PEM_MDL_STR)) ||
+		   (!strcmp(np->vpd.model, NIU_FOXXY_MDL_STR)) ||
+		   (!strcmp(np->vpd.model, NIU_2XGF_MRVL_MDL_STR))) {
 		ports = 2;
 	}
 
@@ -7053,8 +7036,8 @@ static void __devinit niu_pci_vpd_validate(struct niu *np)
 		return;
 	}
 
-	if (!strcmp(np->vpd.model, "SUNW,CP3220") ||
-	    !strcmp(np->vpd.model, "SUNW,CP3260")) {
+	if (!strcmp(np->vpd.model, NIU_ALONSO_MDL_STR) ||
+	    !strcmp(np->vpd.model, NIU_KIMI_MDL_STR)) {
 		np->flags |= NIU_FLAGS_10G;
 		np->flags &= ~NIU_FLAGS_FIBER;
 		np->flags |= NIU_FLAGS_XCVR_SERDES;
@@ -7065,7 +7048,7 @@ static void __devinit niu_pci_vpd_validate(struct niu *np)
 		}
 		if (np->flags & NIU_FLAGS_10G)
 			 np->mac_xcvr = MAC_XCVR_XPCS;
-	} else if (niu_board_model_match(np, NIU_FOXXY_BM_STR)) {
+	} else if (!strcmp(np->vpd.model, NIU_FOXXY_MDL_STR)) {
 		np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER |
 			      NIU_FLAGS_HOTPLUG_PHY);
 	} else if (niu_phy_type_prop_decode(np, np->vpd.phy_type)) {
@@ -7541,8 +7524,8 @@ static int __devinit walk_phys(struct niu *np, struct niu_parent *parent)
 	u32 val;
 	int err;
 
-	if (!strcmp(np->vpd.model, "SUNW,CP3220") ||
-	    !strcmp(np->vpd.model, "SUNW,CP3260")) {
+	if (!strcmp(np->vpd.model, NIU_ALONSO_MDL_STR) ||
+	    !strcmp(np->vpd.model, NIU_KIMI_MDL_STR)) {
 		num_10g = 0;
 		num_1g = 2;
 		parent->plat_type = PLAT_TYPE_ATCA_CP3220;
@@ -7551,7 +7534,7 @@ static int __devinit walk_phys(struct niu *np, struct niu_parent *parent)
 		       phy_encode(PORT_TYPE_1G, 1) |
 		       phy_encode(PORT_TYPE_1G, 2) |
 		       phy_encode(PORT_TYPE_1G, 3));
-	} else if (niu_board_model_match(np, NIU_FOXXY_BM_STR)) {
+	} else if (!strcmp(np->vpd.model, NIU_FOXXY_MDL_STR)) {
 		num_10g = 2;
 		num_1g = 0;
 		parent->num_ports = 2;
@@ -7946,6 +7929,7 @@ static int __devinit niu_get_of_props(struct niu *np)
 	struct device_node *dp;
 	const char *phy_type;
 	const u8 *mac_addr;
+	const char *model;
 	int prop_len;
 
 	if (np->parent->plat_type == PLAT_TYPE_NIU)
@@ -8000,6 +7984,11 @@ static int __devinit niu_get_of_props(struct niu *np)
 
 	memcpy(dev->dev_addr, dev->perm_addr, dev->addr_len);
 
+	model = of_get_property(dp, "model", &prop_len);
+
+	if (model)
+		strcpy(np->vpd.model, model);
+
 	return 0;
 #else
 	return -EINVAL;
diff --git a/drivers/net/niu.h b/drivers/net/niu.h
index 97ffbe1..12fd570 100644
--- a/drivers/net/niu.h
+++ b/drivers/net/niu.h
@@ -2946,6 +2946,15 @@ struct rx_ring_info {
 #define	NIU_ALONSO_BM_STR	"373-0202"
 #define	NIU_FOXXY_BM_STR	"501-7961"
 #define	NIU_2XGF_MRVL_BM_STR	"SK-6E82"
+#define	NIU_QGC_LP_MDL_STR	"SUNW,pcie-qgc"
+#define	NIU_2XGF_LP_MDL_STR	"SUNW,pcie-2xgf"
+#define	NIU_QGC_PEM_MDL_STR	"SUNW,pcie-qgc-pem"
+#define	NIU_2XGF_PEM_MDL_STR	"SUNW,pcie-2xgf-pem"
+#define	NIU_ALONSO_MDL_STR	"SUNW,CP3220"
+#define	NIU_KIMI_MDL_STR	"SUNW,CP3260"
+#define	NIU_MARAMBA_MDL_STR	"SUNW,pcie-neptune"
+#define	NIU_FOXXY_MDL_STR	"SUNW,pcie-rfem"
+#define	NIU_2XGF_MRVL_MDL_STR	"SysKonnect,pcie-2xgf"
 
 #define NIU_VPD_MIN_MAJOR	3
 #define NIU_VPD_MIN_MINOR	4
-- 
cgit v0.10.2


From 6e40a915de82e00d18f75941e531b40c4e0d94c4 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 9 May 2008 15:11:17 -0700
Subject: sctp: Do not enable peer IPv6 address support on PF_INET socket

If socket is create by PF_INET type, it can not used IPv6 address to
send/recv DATA, So we can not used IPv6 address even if peer tell us it
support IPv6 address.
This patch fix to only enabled peer IPv6 address support on PF_INET6 socket.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 81b6064..69a464f 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2418,7 +2418,8 @@ static int sctp_process_param(struct sctp_association *asoc,
 				break;
 
 			case SCTP_PARAM_IPV6_ADDRESS:
-				asoc->peer.ipv6_address = 1;
+				if (PF_INET6 == asoc->base.sk->sk_family)
+					asoc->peer.ipv6_address = 1;
 				break;
 
 			case SCTP_PARAM_HOST_NAME_ADDRESS:
-- 
cgit v0.10.2


From c4492586a618d18e8a5343a04bad0ec606064846 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 9 May 2008 15:11:53 -0700
Subject: sctp: Add address type check while process paramaters of ASCONF chunk

If socket is create by AF_INET type, add IPv6 address to asoc will cause
kernel panic while packet is transmitted on that transport.

This patch add address type check before process paramaters of ASCONF
chunk. If peer is not support this address type, return with error
invald parameter.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 69a464f..6eeee53 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2827,6 +2827,19 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 	union sctp_addr	addr;
 	union sctp_addr_param *addr_param;
 
+	switch (addr_param->v4.param_hdr.type) {
+	case SCTP_PARAM_IPV6_ADDRESS:
+		if (!asoc->peer.ipv6_address)
+			return SCTP_ERROR_INV_PARAM;
+		break;
+	case SCTP_PARAM_IPV4_ADDRESS:
+		if (!asoc->peer.ipv4_address)
+			return SCTP_ERROR_INV_PARAM;
+		break;
+	default:
+		return SCTP_ERROR_INV_PARAM;
+	}
+
 	addr_param = (union sctp_addr_param *)
 			((void *)asconf_param + sizeof(sctp_addip_param_t));
 
-- 
cgit v0.10.2


From 4951704b4e23d71b99ac933d8e6993bc6225ac13 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 May 2008 03:29:11 -0700
Subject: syncppp: Fix crashes.

The syncppp layer wants a mid-level netdev private pointer.

It was using netdev->priv but that only worked by accident,
and thus this scheme was broken when the device private
allocation strategy changed.

Add a proper mid-layer private pointer for uses like this,
update syncppp and all users, and remove the HDLC_PPP broken
tag from drivers/net/wan/Kconfig

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 8005dd1..d5140ae 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -150,11 +150,9 @@ config HDLC_FR
 
 config HDLC_PPP
 	tristate "Synchronous Point-to-Point Protocol (PPP) support"
-	depends on HDLC && BROKEN
+	depends on HDLC
 	help
 	  Generic HDLC driver supporting PPP over WAN connections.
-	  This module is currently broken and will cause a kernel panic
-	  when a device configured in PPP mode is activated.
 
 	  It will be replaced by new PPP implementation in Linux 2.6.26.
 
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 45ddfc9..b0fce13 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -629,7 +629,7 @@ static void sppp_channel_init(struct channel_data *chan)
 	d->base_addr = chan->cosa->datareg;
 	d->irq = chan->cosa->irq;
 	d->dma = chan->cosa->dma;
-	d->priv = chan;
+	d->ml_priv = chan;
 	sppp_attach(&chan->pppdev);
 	if (register_netdev(d)) {
 		printk(KERN_WARNING "%s: register_netdev failed.\n", d->name);
@@ -650,7 +650,7 @@ static void sppp_channel_delete(struct channel_data *chan)
 
 static int cosa_sppp_open(struct net_device *d)
 {
-	struct channel_data *chan = d->priv;
+	struct channel_data *chan = d->ml_priv;
 	int err;
 	unsigned long flags;
 
@@ -690,7 +690,7 @@ static int cosa_sppp_open(struct net_device *d)
 
 static int cosa_sppp_tx(struct sk_buff *skb, struct net_device *dev)
 {
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 
 	netif_stop_queue(dev);
 
@@ -701,7 +701,7 @@ static int cosa_sppp_tx(struct sk_buff *skb, struct net_device *dev)
 
 static void cosa_sppp_timeout(struct net_device *dev)
 {
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 
 	if (test_bit(RXBIT, &chan->cosa->rxtx)) {
 		chan->stats.rx_errors++;
@@ -720,7 +720,7 @@ static void cosa_sppp_timeout(struct net_device *dev)
 
 static int cosa_sppp_close(struct net_device *d)
 {
-	struct channel_data *chan = d->priv;
+	struct channel_data *chan = d->ml_priv;
 	unsigned long flags;
 
 	netif_stop_queue(d);
@@ -800,7 +800,7 @@ static int sppp_tx_done(struct channel_data *chan, int size)
 
 static struct net_device_stats *cosa_net_stats(struct net_device *dev)
 {
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 	return &chan->stats;
 }
 
@@ -1217,7 +1217,7 @@ static int cosa_sppp_ioctl(struct net_device *dev, struct ifreq *ifr,
 	int cmd)
 {
 	int rv;
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 	rv = cosa_ioctl_common(chan->cosa, chan, cmd, (unsigned long)ifr->ifr_data);
 	if (rv == -ENOIOCTLCMD) {
 		return sppp_do_ioctl(dev, ifr, cmd);
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 10396d9..0030833 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -45,7 +45,7 @@ static int ppp_open(struct net_device *dev)
 	int (*old_ioctl)(struct net_device *, struct ifreq *, int);
 	int result;
 
-	dev->priv = &state(hdlc)->syncppp_ptr;
+	dev->ml_priv = &state(hdlc)->syncppp_ptr;
 	state(hdlc)->syncppp_ptr = &state(hdlc)->pppdev;
 	state(hdlc)->pppdev.dev = dev;
 
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 83dbc92..f3065d3 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -75,7 +75,7 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
  
 static int hostess_open(struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	int err = -1;
 	
 	/*
@@ -128,7 +128,7 @@ static int hostess_open(struct net_device *d)
 
 static int hostess_close(struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	/*
 	 *	Discard new frames
 	 */
@@ -159,14 +159,14 @@ static int hostess_close(struct net_device *d)
 
 static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
 {
-	/* struct sv11_device *sv11=d->priv;
+	/* struct sv11_device *sv11=d->ml_priv;
 	   z8530_ioctl(d,&sv11->sync.chanA,ifr,cmd) */
 	return sppp_do_ioctl(d, ifr,cmd);
 }
 
 static struct net_device_stats *hostess_get_stats(struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	if(sv11)
 		return z8530_get_stats(&sv11->sync.chanA);
 	else
@@ -179,7 +179,7 @@ static struct net_device_stats *hostess_get_stats(struct net_device *d)
  
 static int hostess_queue_xmit(struct sk_buff *skb, struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	return z8530_queue_xmit(&sv11->sync.chanA, skb);
 }
 
@@ -325,6 +325,7 @@ static struct sv11_device *sv11_init(int iobase, int irq)
 		/* 
 		 *	Initialise the PPP components
 		 */
+		d->ml_priv = sv;
 		sppp_attach(&sv->netdev);
 		
 		/*
@@ -333,7 +334,6 @@ static struct sv11_device *sv11_init(int iobase, int irq)
 		
 		d->base_addr = iobase;
 		d->irq = irq;
-		d->priv = sv;
 		
 		if(register_netdev(d))
 		{
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 6635ece..62133ce 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -891,6 +891,7 @@ static int __devinit lmc_init_one(struct pci_dev *pdev,
 
     /* Initialize the sppp layer */
     /* An ioctl can cause a subsequent detach for raw frame interface */
+    dev->ml_priv = sc;
     sc->if_type = LMC_PPP;
     sc->check = 0xBEAFCAFE;
     dev->base_addr = pci_resource_start(pdev, 0);
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 11276bf..44a89df 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -241,6 +241,7 @@ static inline struct slvl_device *slvl_alloc(int iobase, int irq)
 		return NULL;
 
 	sv = d->priv;
+	d->ml_priv = sv;
 	sv->if_ptr = &sv->pppdev;
 	sv->pppdev.dev = d;
 	d->base_addr = iobase;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c1d446..7469017 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -715,6 +715,9 @@ struct net_device
 	struct net		*nd_net;
 #endif
 
+	/* mid-layer private */
+	void			*ml_priv;
+
 	/* bridge stuff */
 	struct net_bridge_port	*br_port;
 	/* macvlan */
diff --git a/include/net/syncppp.h b/include/net/syncppp.h
index 877efa4..e43f407 100644
--- a/include/net/syncppp.h
+++ b/include/net/syncppp.h
@@ -59,7 +59,7 @@ struct ppp_device
 
 static inline struct sppp *sppp_of(struct net_device *dev) 
 {
-	struct ppp_device **ppp = dev->priv;
+	struct ppp_device **ppp = dev->ml_priv;
 	BUG_ON((*ppp)->dev != dev);
 	return &(*ppp)->sppp;
 }
-- 
cgit v0.10.2


From 5dc474d6b3ba19df7d491d4eabd9fb7a0c1c2423 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 12 May 2008 15:43:46 -0700
Subject: pppol2tp: Remove null pointer dereference.

If session is NULL, it is not possible to access its name field.  So I
have split apart the printing of the error message to drop the
printing of the name field in this case.

The macro PRINTK actually only evaluates its arguments starting with
the third one if the bitwise conjunction of the first two is non-zero.
Normally, this conjunction would only be non-zero if debugging mode
were turned on, but when session is NULL, the first argument in both
the old and new code is -1, and thus the bitwise conjunction is true.
Perhaps a different strategy is desired, such as using tunnel->debug,
which session->debug is initialized to, but tunnel can also be NULL,
so this does not completely solve the problem.


This problem was found using the following semantic match
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@@
expression E, E1;
identifier f;
statement S1,S2,S3;
@@

* if (E == NULL)
{
  ... when != if (E == NULL) S1 else S2
      when != E = E1
* E->f
  ... when any
  return ...;
}
else S3
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index 244d783..79359919 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -1621,9 +1621,16 @@ out_no_ppp:
 end:
 	release_sock(sk);
 
-	if (error != 0)
-		PRINTK(session ? session->debug : -1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-		       "%s: connect failed: %d\n", session->name, error);
+	if (error != 0) {
+		if (session)
+			PRINTK(session->debug,
+				PPPOL2TP_MSG_CONTROL, KERN_WARNING,
+				"%s: connect failed: %d\n",
+				session->name, error);
+		else
+			PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
+				"connect failed: %d\n", error);
+	}
 
 	return error;
 }
-- 
cgit v0.10.2


From 85b442e378ac3413e269a70a0031727ef121bd2a Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Tue, 22 Apr 2008 14:03:32 -0400
Subject: prism54: fix regression with missing carrier in AP-mode

This fixes a regression introduced by commit 7b463ced6 (prism54: set
carrier flags correctly) which causes the device to come up without
a carrier in AP-mode.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/prism54/islpci_dev.c b/drivers/net/wireless/prism54/islpci_dev.c
index 04c2638..9196825 100644
--- a/drivers/net/wireless/prism54/islpci_dev.c
+++ b/drivers/net/wireless/prism54/islpci_dev.c
@@ -388,8 +388,15 @@ islpci_open(struct net_device *ndev)
 
 	netif_start_queue(ndev);
 
-	/* Turn off carrier unless we know we have associated */
-	netif_carrier_off(ndev);
+	/* Turn off carrier if in STA or Ad-hoc mode. It will be turned on
+	 * once the firmware receives a trap of being associated
+	 * (GEN_OID_LINKSTATE). In other modes (AP or WDS or monitor) we
+	 * should just leave the carrier on as its expected the firmware
+	 * won't send us a trigger. */
+	if (priv->iw_mode == IW_MODE_INFRA || priv->iw_mode == IW_MODE_ADHOC)
+		netif_carrier_off(ndev);
+	else
+		netif_carrier_on(ndev);
 
 	return 0;
 }
-- 
cgit v0.10.2


From bb55bdd512905f35f9d7dfe65d1f16014e1f9b2f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 1 May 2008 15:58:17 -0700
Subject: fix irq flags in mac80211 code

A file in the net/mac80211 directory uses "int" for flags.  This can cause
hard to find bugs on some architectures.  This patch converts the flags to use
"long" instead.

This bug was discovered by doing an allyesconfig make on the -rt kernel where
checks are done to ensure all flags are of size sizeof(long).

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index ae75d41..ff5c380f 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -85,7 +85,7 @@ static int rate_control_pid_events_open(struct inode *inode, struct file *file)
 	struct rc_pid_sta_info *sinfo = inode->i_private;
 	struct rc_pid_event_buffer *events = &sinfo->events;
 	struct rc_pid_events_file_info *file_info;
-	unsigned int status;
+	unsigned long status;
 
 	/* Allocate a state struct */
 	file_info = kmalloc(sizeof(*file_info), GFP_KERNEL);
@@ -135,7 +135,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
 	char pb[RC_PID_PRINT_BUF_SIZE];
 	int ret;
 	int p;
-	unsigned int status;
+	unsigned long status;
 
 	/* Check if there is something to read. */
 	if (events->next_entry == file_info->next_entry) {
-- 
cgit v0.10.2


From d5251aea1539ec89dd567e75169c568b5243b6fa Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 2 May 2008 09:56:34 -0400
Subject: wavelan: avoid index past end of array if DEBUG_SHOW_UNUSED is
 defined
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by Daniel Marjamäki <danielm77@spray.se> here:

	http://bugzilla.kernel.org/show_bug.cgi?id=10588

Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 03384a4..49ae970 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -908,9 +908,9 @@ static void wv_psa_show(psa_t * p)
 	     p->psa_call_code[3], p->psa_call_code[4], p->psa_call_code[5],
 	     p->psa_call_code[6], p->psa_call_code[7]);
 #ifdef DEBUG_SHOW_UNUSED
-	printk(KERN_DEBUG "psa_reserved[]: %02X:%02X:%02X:%02X\n",
+	printk(KERN_DEBUG "psa_reserved[]: %02X:%02X\n",
 	       p->psa_reserved[0],
-	       p->psa_reserved[1], p->psa_reserved[2], p->psa_reserved[3]);
+	       p->psa_reserved[1]);
 #endif				/* DEBUG_SHOW_UNUSED */
 	printk(KERN_DEBUG "psa_conf_status: %d, ", p->psa_conf_status);
 	printk("psa_crc: 0x%02x%02x, ", p->psa_crc[0], p->psa_crc[1]);
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index baf7401..b584c0e 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -1074,11 +1074,9 @@ wv_psa_show(psa_t *	p)
 	 p->psa_call_code[6],
 	 p->psa_call_code[7]);
 #ifdef DEBUG_SHOW_UNUSED
-  printk(KERN_DEBUG "psa_reserved[]: %02X:%02X:%02X:%02X\n",
+  printk(KERN_DEBUG "psa_reserved[]: %02X:%02X\n",
 	 p->psa_reserved[0],
-	 p->psa_reserved[1],
-	 p->psa_reserved[2],
-	 p->psa_reserved[3]);
+	 p->psa_reserved[1]);
 #endif	/* DEBUG_SHOW_UNUSED */
   printk(KERN_DEBUG "psa_conf_status: %d, ", p->psa_conf_status);
   printk("psa_crc: 0x%02x%02x, ", p->psa_crc[0], p->psa_crc[1]);
-- 
cgit v0.10.2


From 78520cad4b222d81fa18f2dcfa52394d8d1722b0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 3 May 2008 01:04:47 +0200
Subject: mac80211: fix debugfs default key oops

Under certain circumstances (in AP mode) the debugfs function
that is supposed to add the default key symlink can encounter
a NULL default_key pointer. This patch makes it handle that
situtation gracefully.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 879e721..19efc3a 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -255,14 +255,23 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
 void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata)
 {
 	char buf[50];
+	struct ieee80211_key *key;
 
 	if (!sdata->debugfsdir)
 		return;
 
-	sprintf(buf, "../keys/%d", sdata->default_key->debugfs.cnt);
-	sdata->debugfs.default_key =
-		debugfs_create_symlink("default_key", sdata->debugfsdir, buf);
+	/* this is running under the key lock */
+
+	key = sdata->default_key;
+	if (key) {
+		sprintf(buf, "../keys/%d", key->debugfs.cnt);
+		sdata->debugfs.default_key =
+			debugfs_create_symlink("default_key",
+					       sdata->debugfsdir, buf);
+	} else
+		ieee80211_debugfs_key_remove_default(sdata);
 }
+
 void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
 {
 	if (!sdata)
-- 
cgit v0.10.2


From 6243065d308ab566aa318a8adef853bc0418896d Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Mon, 5 May 2008 10:22:46 +0800
Subject: iwlwifi: fix compile error when CONFIG_MAC80211_DEBUGFS is not
 selected

Make iwl4965_lq_sta->drv available even without CONFIG_MAC80211_DEBUGFS.

Signed-off-by: Yi Zhu <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
index b608e1c..c9847b1 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965-rs.c
@@ -163,8 +163,8 @@ struct iwl4965_lq_sta {
 	struct dentry *rs_sta_dbgfs_tx_agg_tid_en_file;
 #endif
 	struct iwl4965_rate dbg_fixed;
-	struct iwl_priv *drv;
 #endif
+	struct iwl_priv *drv;
 };
 
 static void rs_rate_scale_perform(struct iwl_priv *priv,
-- 
cgit v0.10.2


From 78720897459a0ed3843c80e9bd9ef1b2f7ae5c8f Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Mon, 5 May 2008 17:23:31 +0200
Subject: rt2x00: Don't use pskb_expand_head()

rt2x00pci allocates DMA for descriptor and data,
rt61pci doesn't use this for the beacon, but it can
use the descriptor part as temporary buffer instead
of using pskb_expand_head().
Using this temporary buffer is obviously much better
then reallocating the skb buffer...

At the same time we can set the data length for the
beacon queue at 0, to make sure no DMA is allocated for
data (but just for the descriptor).

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index ae12dcd..14bc7b2 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -2366,6 +2366,7 @@ static int rt61pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct rt2x00_intf *intf = vif_to_intf(control->vif);
+	struct queue_entry_priv_pci_tx *priv_tx;
 	struct skb_frame_desc *skbdesc;
 	unsigned int beacon_base;
 	u32 reg;
@@ -2373,21 +2374,8 @@ static int rt61pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 	if (unlikely(!intf->beacon))
 		return -ENOBUFS;
 
-	/*
-	 * We need to append the descriptor in front of the
-	 * beacon frame.
-	 */
-	if (skb_headroom(skb) < intf->beacon->queue->desc_size) {
-		if (pskb_expand_head(skb, intf->beacon->queue->desc_size,
-				     0, GFP_ATOMIC))
-			return -ENOMEM;
-	}
-
-	/*
-	 * Add the descriptor in front of the skb.
-	 */
-	skb_push(skb, intf->beacon->queue->desc_size);
-	memset(skb->data, 0, intf->beacon->queue->desc_size);
+	priv_tx = intf->beacon->priv_data;
+	memset(priv_tx->desc, 0, intf->beacon->queue->desc_size);
 
 	/*
 	 * Fill in skb descriptor
@@ -2395,9 +2383,9 @@ static int rt61pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 	skbdesc = get_skb_frame_desc(skb);
 	memset(skbdesc, 0, sizeof(*skbdesc));
 	skbdesc->flags |= FRAME_DESC_DRIVER_GENERATED;
-	skbdesc->data = skb->data + intf->beacon->queue->desc_size;
-	skbdesc->data_len = skb->len - intf->beacon->queue->desc_size;
-	skbdesc->desc = skb->data;
+	skbdesc->data = skb->data;
+	skbdesc->data_len = skb->len;
+	skbdesc->desc = priv_tx->desc;
 	skbdesc->desc_len = intf->beacon->queue->desc_size;
 	skbdesc->entry = intf->beacon;
 
@@ -2425,7 +2413,10 @@ static int rt61pci_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 */
 	beacon_base = HW_BEACON_OFFSET(intf->beacon->entry_idx);
 	rt2x00pci_register_multiwrite(rt2x00dev, beacon_base,
-				      skb->data, skb->len);
+				      skbdesc->desc, skbdesc->desc_len);
+	rt2x00pci_register_multiwrite(rt2x00dev,
+				      beacon_base + skbdesc->desc_len,
+				      skbdesc->data, skbdesc->data_len);
 	rt61pci_kick_tx_queue(rt2x00dev, control->queue);
 
 	return 0;
@@ -2490,7 +2481,7 @@ static const struct data_queue_desc rt61pci_queue_tx = {
 
 static const struct data_queue_desc rt61pci_queue_bcn = {
 	.entry_num		= 4 * BEACON_ENTRIES,
-	.data_size		= MGMT_FRAME_SIZE,
+	.data_size		= 0, /* No DMA required for beacons */
 	.desc_size		= TXINFO_SIZE,
 	.priv_size		= sizeof(struct queue_entry_priv_pci_tx),
 };
-- 
cgit v0.10.2


From ed499983b88d138848ec9e4d104fd86a5ef0c183 Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Mon, 5 May 2008 17:23:47 +0200
Subject: rt2x00: Fix broken recover-on-error path

During initialization the initialize() callback function
in rt2x00pci and rt2x00usb will cleanup the mess they made.

rt2x00lib shouldn't call uninitialize because the callback function already
cleaned up _and_ the DEVICE_INITIALIZED isn't set which causes the
rt2x00lib_uninitialize() to halt directly anyway. All that is required
to be cleaned up by rt2x00lib is the queue, and that can be done by
calling rt2x00queue_uninitialize() directly.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 8d8657f..b22c027 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -1032,8 +1032,10 @@ static int rt2x00lib_initialize(struct rt2x00_dev *rt2x00dev)
 	 * Initialize the device.
 	 */
 	status = rt2x00dev->ops->lib->initialize(rt2x00dev);
-	if (status)
-		goto exit;
+	if (status) {
+		rt2x00queue_uninitialize(rt2x00dev);
+		return status;
+	}
 
 	__set_bit(DEVICE_INITIALIZED, &rt2x00dev->flags);
 
@@ -1043,11 +1045,6 @@ static int rt2x00lib_initialize(struct rt2x00_dev *rt2x00dev)
 	rt2x00rfkill_register(rt2x00dev);
 
 	return 0;
-
-exit:
-	rt2x00lib_uninitialize(rt2x00dev);
-
-	return status;
 }
 
 int rt2x00lib_start(struct rt2x00_dev *rt2x00dev)
-- 
cgit v0.10.2


From b30cdfc517b06f5d3f7a5e90626931140b2caece Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Mon, 5 May 2008 17:24:03 +0200
Subject: rt2x00: Clean up error handling of PCI queue DMA allocation.

When, for some reason, the rt2x00pci module fails to allocate DMA memory for
the queues, it tries to undo the complete initialization of the PCI device,
including freeing of the irq. This results in the following error in dmesg, as
the irq hadn't been requested yet:

[  78.123456] Trying to free already-free IRQ 17

Fix this by implementing proper error handling code, instead of just using the
full uninitialization function.

Signed-off-by: Gertjan van Wingerde <gwingerde@kpnplanet.nl>
Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/rt2x00/rt2x00pci.c b/drivers/net/wireless/rt2x00/rt2x00pci.c
index 7867ec6..971af25 100644
--- a/drivers/net/wireless/rt2x00/rt2x00pci.c
+++ b/drivers/net/wireless/rt2x00/rt2x00pci.c
@@ -314,13 +314,14 @@ int rt2x00pci_initialize(struct rt2x00_dev *rt2x00dev)
 	if (status) {
 		ERROR(rt2x00dev, "IRQ %d allocation failed (error %d).\n",
 		      pci_dev->irq, status);
-		return status;
+		goto exit;
 	}
 
 	return 0;
 
 exit:
-	rt2x00pci_uninitialize(rt2x00dev);
+	queue_for_each(rt2x00dev, queue)
+		rt2x00pci_free_queue_dma(rt2x00dev, queue);
 
 	return status;
 }
-- 
cgit v0.10.2


From df44205455773852a6af10a7c6ed768fe8a86b31 Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Mon, 5 May 2008 20:40:35 +0200
Subject: mac80211: Don't encrypt beacons

mac80211 should set the IEEE80211_TX_CTL_DO_NOT_ENCRYPT flag in tx_control
structure to inform drivers not to encrypt the beacon. Drivers that only check
for that flag before accessing the hw_key field, will otherwise cause a NULL
pointer dereference since that field is not configured for beacons.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f35eaea9..28d8bd5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1898,6 +1898,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 			control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE;
 		control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 		control->flags |= IEEE80211_TXCTL_NO_ACK;
+		control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
 		control->retry_limit = 1;
 		control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT;
 	}
-- 
cgit v0.10.2


From ef269254772a0d2253c85cafe160e3f6528eb292 Mon Sep 17 00:00:00 2001
From: Luis Carlos Cobo <luisca@cozybit.com>
Date: Mon, 5 May 2008 12:02:35 -0700
Subject: mac80211: fix incorrect mesh header length

This should have been updated at the same time we were transitioning from 3 byte
to 4 byte mesh sequence number. Pointed out by Johannes Berg.

Signed-off-by: Luis Carlos Cobo <luisca@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index f76bc26..697ef67 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -397,7 +397,7 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
 	put_unaligned(cpu_to_le32(sdata->u.sta.mesh_seqnum), &meshhdr->seqnum);
 	sdata->u.sta.mesh_seqnum++;
 
-	return 5;
+	return 6;
 }
 
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index cc9f715..24a465c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -153,15 +153,15 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 	/* 7.1.3.5a.2 */
 	switch (ae) {
 	case 0:
-		return 5;
+		return 6;
 	case 1:
-		return 11;
+		return 12;
 	case 2:
-		return 17;
+		return 18;
 	case 3:
-		return 23;
+		return 24;
 	default:
-		return 5;
+		return 6;
 	}
 }
 
-- 
cgit v0.10.2


From 69687a0b9934942e61bf8148c242adea87183a5b Mon Sep 17 00:00:00 2001
From: Luis Carlos Cobo <luisca@cozybit.com>
Date: Mon, 5 May 2008 12:29:42 -0700
Subject: mac80211: fix access to null skb

Without this patch, if xmit_skb is null but net_ratelimit() returns 0 we would
go to the else branch and access the null xmit_skb. Pointed out by Johannes
Berg.

Signed-off-by: Luis Carlos Cobo <luisca@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 02f436a..9c57b3a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1305,11 +1305,11 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 		if (is_multicast_ether_addr(skb->data)) {
 			if (*mesh_ttl > 0) {
 				xmit_skb = skb_copy(skb, GFP_ATOMIC);
-				if (!xmit_skb && net_ratelimit())
+				if (xmit_skb)
+					xmit_skb->pkt_type = PACKET_OTHERHOST;
+				else if (net_ratelimit())
 					printk(KERN_DEBUG "%s: failed to clone "
 					       "multicast frame\n", dev->name);
-				else
-					xmit_skb->pkt_type = PACKET_OTHERHOST;
 			} else
 				IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta,
 							     dropped_frames_ttl);
-- 
cgit v0.10.2


From 812714d741750038004da505074c9158e9dee270 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 6 May 2008 12:52:07 +0200
Subject: mac80211: mesh hwmp: fix kfree(skb)

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 3df8092..af0cd1e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -120,7 +120,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 		*pos++ = WLAN_EID_PREP;
 		break;
 	default:
-		kfree(skb);
+		kfree_skb(skb);
 		return -ENOTSUPP;
 		break;
 	}
-- 
cgit v0.10.2


From f84e71a94cb5f88d86ab50c251e09379925b80b9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 6 May 2008 18:46:36 +0400
Subject: Fix GFP_KERNEL allocation under read lock.

The mesh_path_add() read-locks the pathtbl_resize_lock and calls
kmalloc with GFP_KERNEL mask.

Fix it and move the endadd2 label lower. It should be _before_ the
if() beyond, but it makes no sense for it being there, so I move it
right after this if().

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 5845dc2..727aa52 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -158,14 +158,14 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 	if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0)
 		return -ENOSPC;
 
-	read_lock(&pathtbl_resize_lock);
-
 	new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL);
 	if (!new_mpath) {
 		atomic_dec(&sdata->u.sta.mpaths);
 		err = -ENOMEM;
 		goto endadd2;
 	}
+
+	read_lock(&pathtbl_resize_lock);
 	memcpy(new_mpath->dst, dst, ETH_ALEN);
 	new_mpath->dev = dev;
 	new_mpath->flags = 0;
@@ -202,7 +202,6 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 
 endadd:
 	spin_unlock(&mesh_paths->hashwlock[hash_idx]);
-endadd2:
 	read_unlock(&pathtbl_resize_lock);
 	if (!err && grow) {
 		struct mesh_table *oldtbl, *newtbl;
@@ -219,6 +218,7 @@ endadd2:
 		mesh_table_free(oldtbl, false);
 		write_unlock(&pathtbl_resize_lock);
 	}
+endadd2:
 	return err;
 }
 
-- 
cgit v0.10.2


From 0eb03d5a14377eecf6ed0ebf3cc2c9f48c12c7c6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 6 May 2008 18:49:02 +0400
Subject: Fix not checked kmalloc() result.

The new_node kmallocation is not checked for success, so add
this check.

BTW, it also happens under the read_lock.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 727aa52..1d2d051 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -164,13 +164,19 @@ int mesh_path_add(u8 *dst, struct net_device *dev)
 		err = -ENOMEM;
 		goto endadd2;
 	}
+	new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
+	if (!new_node) {
+		kfree(new_mpath);
+		atomic_dec(&sdata->u.sta.mpaths);
+		err = -ENOMEM;
+		goto endadd2;
+	}
 
 	read_lock(&pathtbl_resize_lock);
 	memcpy(new_mpath->dst, dst, ETH_ALEN);
 	new_mpath->dev = dev;
 	new_mpath->flags = 0;
 	skb_queue_head_init(&new_mpath->frame_queue);
-	new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL);
 	new_node->mpath = new_mpath;
 	new_mpath->timer.data = (unsigned long) new_mpath;
 	new_mpath->timer.function = mesh_path_timer;
-- 
cgit v0.10.2


From 6d6936e2ea82ebcbdd12d489b7b5ccf430de52f1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 6 May 2008 18:51:31 +0400
Subject: Fix potential scheduling while atomic in mesh_path_add.

Calling synchronize_rcu() under write-lock-ed pathtbl_resize_lock may
result in this warning (and other side effects).

It looks safe just dropping this lock before calling synchronize_rcu.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 1d2d051..99c2d36 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -220,9 +220,10 @@ endadd:
 			return -ENOMEM;
 		}
 		rcu_assign_pointer(mesh_paths, newtbl);
+		write_unlock(&pathtbl_resize_lock);
+
 		synchronize_rcu();
 		mesh_table_free(oldtbl, false);
-		write_unlock(&pathtbl_resize_lock);
 	}
 endadd2:
 	return err;
-- 
cgit v0.10.2


From dbabad0c9c026dea3ba803cbd9d768cdffc68e32 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 8 May 2008 01:43:59 +0200
Subject: zd1211rw: fix potential use-after-free bug

zd_mac_tx_to_dev() could potentially free the skb, or hand it off
to mac80211 which might free it. Hence, this code needs to get the
usb pointer out of skb->cb before handing it off to that function.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index 5316074..12e24f0 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -889,9 +889,13 @@ static void tx_urb_complete(struct urb *urb)
 	}
 free_urb:
 	skb = (struct sk_buff *)urb->context;
-	zd_mac_tx_to_dev(skb, urb->status);
+	/*
+	 * grab 'usb' pointer before handing off the skb (since
+	 * it might be freed by zd_mac_tx_to_dev or mac80211)
+	 */
 	cb = (struct zd_tx_skb_control_block *)skb->cb;
 	usb = &zd_hw_mac(cb->hw)->chip.usb;
+	zd_mac_tx_to_dev(skb, urb->status);
 	free_tx_urb(usb, urb);
 	tx_dec_submitted_urbs(usb);
 	return;
-- 
cgit v0.10.2


From c0186078b78839a8bdb385fa07a816c2f348a49d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 8 May 2008 11:34:05 +0800
Subject: iwlwifi: Fix frequency in rx_status fill

This patch fixes a bug in RX path, the frequency was wrongly set in the
ieee80211_rx_status. This bug led to an empty scan list in A band.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index d340683..62a3d8f 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -666,7 +666,7 @@ static void iwl3945_rx_reply_rx(struct iwl3945_priv *priv,
 	rx_status.flag = 0;
 	rx_status.mactime = le64_to_cpu(rx_end->timestamp);
 	rx_status.freq =
-		ieee80211_frequency_to_channel(le16_to_cpu(rx_hdr->channel));
+		ieee80211_channel_to_frequency(le16_to_cpu(rx_hdr->channel));
 	rx_status.band = (rx_hdr->phy_flags & RX_RES_PHY_FLAGS_BAND_24_MSK) ?
 				IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index 17f629f..bf19eb8 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -3978,7 +3978,7 @@ static void iwl4965_rx_reply_rx(struct iwl_priv *priv,
 
 	rx_status.mactime = le64_to_cpu(rx_start->timestamp);
 	rx_status.freq =
-		ieee80211_frequency_to_channel(le16_to_cpu(rx_start->channel));
+		ieee80211_channel_to_frequency(le16_to_cpu(rx_start->channel));
 	rx_status.band = (rx_start->phy_flags & RX_RES_PHY_FLAGS_BAND_24_MSK) ?
 				IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ;
 	rx_status.rate_idx =
-- 
cgit v0.10.2


From 36d16ae73becc5978fe22866e9ab66b509211afe Mon Sep 17 00:00:00 2001
From: Helmut Schaa <hschaa@suse.de>
Date: Thu, 8 May 2008 13:34:07 +0200
Subject: mac80211: fix association with some APs

Some APs refuse association if the supported rates contained in the
association request do not match its own supported rates. This patch
introduces a new function which builds the intersection between the AP's
supported rates and the client's supported rates to work around such
problems. The same approach is already used in ipw2200 for example.

Signed-off-by: Helmut Schaa <hschaa@suse.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a5e5c31..4adba09 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -665,6 +665,26 @@ static void ieee80211_authenticate(struct net_device *dev,
 	mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
 }
 
+static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss,
+				      struct ieee80211_supported_band *sband,
+				      u64 *rates)
+{
+	int i, j, count;
+	*rates = 0;
+	count = 0;
+	for (i = 0; i < bss->supp_rates_len; i++) {
+		int rate = (bss->supp_rates[i] & 0x7F) * 5;
+
+		for (j = 0; j < sband->n_bitrates; j++)
+			if (sband->bitrates[j].bitrate == rate) {
+				*rates |= BIT(j);
+				count++;
+				break;
+			}
+	}
+
+	return count;
+}
 
 static void ieee80211_send_assoc(struct net_device *dev,
 				 struct ieee80211_if_sta *ifsta)
@@ -673,11 +693,12 @@ static void ieee80211_send_assoc(struct net_device *dev,
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, *ies;
-	int i, len;
+	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_sta_bss *bss;
 	int wmm = 0;
 	struct ieee80211_supported_band *sband;
+	u64 rates = 0;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    sizeof(*mgmt) + 200 + ifsta->extra_ie_len +
@@ -740,24 +761,39 @@ static void ieee80211_send_assoc(struct net_device *dev,
 	*pos++ = ifsta->ssid_len;
 	memcpy(pos, ifsta->ssid, ifsta->ssid_len);
 
+	/* all supported rates should be added here but some APs
+	 * (e.g. D-Link DAP 1353 in b-only mode) don't like that
+	 * Therefore only add rates the AP supports */
+	rates_len = ieee80211_compatible_rates(bss, sband, &rates);
+	supp_rates_len = rates_len;
+	if (supp_rates_len > 8)
+		supp_rates_len = 8;
+
 	len = sband->n_bitrates;
-	if (len > 8)
-		len = 8;
-	pos = skb_put(skb, len + 2);
+	pos = skb_put(skb, supp_rates_len + 2);
 	*pos++ = WLAN_EID_SUPP_RATES;
-	*pos++ = len;
-	for (i = 0; i < len; i++) {
-		int rate = sband->bitrates[i].bitrate;
-		*pos++ = (u8) (rate / 5);
-	}
+	*pos++ = supp_rates_len;
 
-	if (sband->n_bitrates > len) {
-		pos = skb_put(skb, sband->n_bitrates - len + 2);
-		*pos++ = WLAN_EID_EXT_SUPP_RATES;
-		*pos++ = sband->n_bitrates - len;
-		for (i = len; i < sband->n_bitrates; i++) {
+	count = 0;
+	for (i = 0; i < sband->n_bitrates; i++) {
+		if (BIT(i) & rates) {
 			int rate = sband->bitrates[i].bitrate;
 			*pos++ = (u8) (rate / 5);
+			if (++count == 8)
+				break;
+		}
+	}
+
+	if (count == 8) {
+		pos = skb_put(skb, rates_len - count + 2);
+		*pos++ = WLAN_EID_EXT_SUPP_RATES;
+		*pos++ = rates_len - count;
+
+		for (i++; i < sband->n_bitrates; i++) {
+			if (BIT(i) & rates) {
+				int rate = sband->bitrates[i].bitrate;
+				*pos++ = (u8) (rate / 5);
+			}
 		}
 	}
 
-- 
cgit v0.10.2


From 6fc7431dc0775f21ad7a7a39c2ad0290291a56ea Mon Sep 17 00:00:00 2001
From: Masakazu Mokuno <mokuno@sm.sony.co.jp>
Date: Mon, 12 May 2008 13:50:28 +0900
Subject: PS3: gelic: fix memory leak

This fixes the bug that the I/O buffer is not freed at the driver removal.

Signed-off-by: Masakazu Mokuno <mokuno@sm.sony.co.jp>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/drivers/net/ps3_gelic_wireless.c b/drivers/net/ps3_gelic_wireless.c
index 0d32123..1dae1f2 100644
--- a/drivers/net/ps3_gelic_wireless.c
+++ b/drivers/net/ps3_gelic_wireless.c
@@ -2474,6 +2474,8 @@ static void gelic_wl_free(struct gelic_wl_info *wl)
 
 	pr_debug("%s: <-\n", __func__);
 
+	free_page((unsigned long)wl->buf);
+
 	pr_debug("%s: destroy queues\n", __func__);
 	destroy_workqueue(wl->eurus_cmd_queue);
 	destroy_workqueue(wl->event_queue);
-- 
cgit v0.10.2


From a4278e18e7e497b76781492d010035c3c36f7403 Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Mon, 12 May 2008 09:02:24 -0400
Subject: mac80211: add missing newlines in printk()

Signed-off-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9c57b3a..1958bfb3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1395,7 +1395,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 		padding = ((4 - subframe_len) & 0x3);
 		/* the last MSDU has no padding */
 		if (subframe_len > remaining) {
-			printk(KERN_DEBUG "%s: wrong buffer size", dev->name);
+			printk(KERN_DEBUG "%s: wrong buffer size\n", dev->name);
 			return RX_DROP_UNUSABLE;
 		}
 
@@ -1418,7 +1418,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 			eth = (struct ethhdr *) skb_pull(skb, ntohs(len) +
 							padding);
 			if (!eth) {
-				printk(KERN_DEBUG "%s: wrong buffer size ",
+				printk(KERN_DEBUG "%s: wrong buffer size\n",
 				       dev->name);
 				dev_kfree_skb(frame);
 				return RX_DROP_UNUSABLE;
@@ -1952,7 +1952,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 		if (!skb_new) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "%s: failed to copy "
-				       "multicast frame for %s",
+				       "multicast frame for %s\n",
 				       wiphy_name(local->hw.wiphy),
 				       prev->dev->name);
 			continue;
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 64faa3d..dc1598b 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -394,7 +394,8 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
 						 qd->handle);
 		if (!q->queues[i]) {
 			q->queues[i] = &noop_qdisc;
-			printk(KERN_ERR "%s child qdisc %i creation failed", dev->name, i);
+			printk(KERN_ERR "%s child qdisc %i creation failed\n",
+			       dev->name, i);
 		}
 	}
 
-- 
cgit v0.10.2


From 8388e3da34edb141362bb42811ee487dfec15525 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 May 2008 20:17:33 -0700
Subject: net: Set LL_MAX_HEADER properly for wireless.

Wireless networking, particularly with MESH enabled, has
quite strong requirements for link-layer header space.

Based upon some numbers and descriptions from Johannes Berg
we use 96 (same as AX25) for plain wireless, and with
mesh enabled we use 128.

In the process, simplify the cpp conditional logic here by
ordering the cases by those needing the most space down
to those needing the least case.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7469017..a3fb57f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -93,14 +93,16 @@ struct wireless_dev;
  *	used.
  */
  
-#if !defined(CONFIG_AX25) && !defined(CONFIG_AX25_MODULE) && !defined(CONFIG_TR)
-#define LL_MAX_HEADER	32
+#if defined(CONFIG_WLAN_80211) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+# if defined(CONFIG_MAC80211_MESH)
+#  define LL_MAX_HEADER 128
+# else
+#  define LL_MAX_HEADER 96
+# endif
+#elif defined(CONFIG_TR)
+# define LL_MAX_HEADER 48
 #else
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
-#define LL_MAX_HEADER	96
-#else
-#define LL_MAX_HEADER	48
-#endif
+# define LL_MAX_HEADER 32
 #endif
 
 #if !defined(CONFIG_NET_IPIP) && !defined(CONFIG_NET_IPIP_MODULE) && \
-- 
cgit v0.10.2


From f5184d267c1aedb9b7a8cc44e08ff6b8d382c3b5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 12 May 2008 20:48:31 -0700
Subject: net: Allow netdevices to specify needed head/tailroom

This patch adds needed_headroom/needed_tailroom members to struct
net_device and updates many places that allocate sbks to use them. Not
all of them can be converted though, and I'm sure I missed some (I
mostly grepped for LL_RESERVED_SPACE)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a3fb57f..b11e6e1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -246,11 +246,16 @@ struct hh_cache
  *
  * We could use other alignment values, but we must maintain the
  * relationship HH alignment <= LL alignment.
+ *
+ * LL_ALLOCATED_SPACE also takes into account the tailroom the device
+ * may need.
  */
 #define LL_RESERVED_SPACE(dev) \
-	(((dev)->hard_header_len&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
-	((((dev)->hard_header_len+extra)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+#define LL_ALLOCATED_SPACE(dev) \
+	((((dev)->hard_header_len+(dev)->needed_headroom+(dev)->needed_tailroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 
 struct header_ops {
 	int	(*create) (struct sk_buff *skb, struct net_device *dev,
@@ -569,6 +574,13 @@ struct net_device
 	unsigned short		type;	/* interface hardware type	*/
 	unsigned short		hard_header_len;	/* hardware hdr length	*/
 
+	/* extra head- and tailroom the hardware may need, but not in all cases
+	 * can this be guaranteed, especially tailroom. Some cases also use
+	 * LL_MAX_HEADER instead to allocate the skb.
+	 */
+	unsigned short		needed_headroom;
+	unsigned short		needed_tailroom;
+
 	struct net_device	*master; /* Pointer to master device of a group,
 					  * which this device is member of.
 					  */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b04d643..8fb134d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -419,7 +419,7 @@ static void arp_reply(struct sk_buff *skb)
 		return;
 
 	size = arp_hdr_len(skb->dev);
-	send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
+	send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
 			    LL_RESERVED_SPACE(np->dev));
 
 	if (!send_skb)
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 68d1544..7c9bb13 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -340,7 +340,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 		dev_hold(dev);
 
-		skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev),
+		skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
 					  msg->msg_flags & MSG_DONTWAIT, &err);
 		if (skb==NULL)
 			goto out_unlock;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 68b72a7..418862f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -570,7 +570,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 	 *	Allocate a buffer
 	 */
 
-	skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL)
 		return NULL;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6250f42..2769dc4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -292,7 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	struct iphdr *pip;
 	struct igmpv3_report *pig;
 
-	skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL)
 		return NULL;
 
@@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		return -1;
 	}
 
-	skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL) {
 		ip_rt_put(rt);
 		return -1;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 89dee43..ed45037 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -710,14 +710,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 	struct net_device *dev = d->dev;
 	struct sk_buff *skb;
 	struct bootp_pkt *b;
-	int hh_len = LL_RESERVED_SPACE(dev);
 	struct iphdr *h;
 
 	/* Allocate packet */
-	skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+	skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
+			GFP_KERNEL);
 	if (!skb)
 		return;
-	skb_reserve(skb, hh_len);
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
 	memset(b, 0, sizeof(struct bootp_pkt));
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 11d7f75..fead049 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -322,7 +322,6 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 			unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	int hh_len;
 	struct iphdr *iph;
 	struct sk_buff *skb;
 	unsigned int iphlen;
@@ -336,13 +335,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	if (flags&MSG_PROBE)
 		goto out;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
-	skb = sock_alloc_send_skb(sk, length+hh_len+15,
-				  flags&MSG_DONTWAIT, &err);
+	skb = sock_alloc_send_skb(sk,
+				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, hh_len);
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0af2e05..48cdce9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -780,7 +780,7 @@ slow_path:
 		 *	Allocate buffer.
 		 */
 
-		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
 				      IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 54f91ef..fd632dd 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 		     IPV6_TLV_PADN, 0 };
 
 	/* we assume size > sizeof(ra) here */
-	skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err);
+	skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err);
 
 	if (!skb)
 		return NULL;
@@ -1790,7 +1790,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	payload_len = len + sizeof(ra);
 	full_len = sizeof(struct ipv6hdr) + payload_len;
 
-	skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
+	skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err);
 
 	if (skb == NULL) {
 		rcu_read_lock();
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2c74885..a55fc05 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -479,7 +479,7 @@ static void __ndisc_send(struct net_device *dev,
 
 	skb = sock_alloc_send_skb(sk,
 				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + LL_RESERVED_SPACE(dev)),
+				   len + LL_ALLOCATED_SPACE(dev)),
 				  1, &err);
 	if (!skb) {
 		ND_PRINTK0(KERN_ERR
@@ -1521,7 +1521,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	buff = sock_alloc_send_skb(sk,
 				   (MAX_HEADER + sizeof(struct ipv6hdr) +
-				    len + LL_RESERVED_SPACE(dev)),
+				    len + LL_ALLOCATED_SPACE(dev)),
 				   1, &err);
 	if (buff == NULL) {
 		ND_PRINTK0(KERN_ERR
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 396f0ea..232e0dc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -609,7 +609,6 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
-	unsigned int hh_len;
 	int err;
 
 	if (length > rt->u.dst.dev->mtu) {
@@ -619,13 +618,12 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	if (flags&MSG_PROBE)
 		goto out;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
-	skb = sock_alloc_send_skb(sk, length+hh_len+15,
-				  flags&MSG_DONTWAIT, &err);
+	skb = sock_alloc_send_skb(sk,
+				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, hh_len);
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2507024..2cee87d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -743,7 +743,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (len > dev->mtu+reserve)
 		goto out_unlock;
 
-	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
+	skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
 				msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb==NULL)
 		goto out_unlock;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 09cd9c0..3f964db 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -25,11 +25,11 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 	struct dst_entry *dst = skb->dst;
 	int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
 		- skb_headroom(skb);
+	int ntail = dst->dev->needed_tailroom - skb_tailroom(skb);
 
-	if (nhead > 0)
-		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+	if (nhead > 0 || ntail > 0)
+		return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
 
-	/* Check tail too... */
 	return 0;
 }
 
-- 
cgit v0.10.2


From f3994eceebf64cf356a82ffb2718ef538eb8d4f4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 12 May 2008 20:51:44 -0700
Subject: mac80211: assign needed_headroom/tailroom for netdevs

This assigns the netdev's needed_headroom/tailroom members to take
advantage of pre-allocated space for 802.11 headers.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 80954a5..06e88a5 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -54,6 +54,15 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
 	if (!ndev)
 		return -ENOMEM;
 
+	ndev->needed_headroom = local->tx_headroom +
+				4*6 /* four MAC addresses */
+				+ 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
+				+ 6 /* mesh */
+				+ 8 /* rfc1042/bridge tunnel */
+				- ETH_HLEN /* ethernet hard_header_len */
+				+ IEEE80211_ENCRYPT_HEADROOM;
+	ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
 	ret = dev_alloc_name(ndev, ndev->name);
 	if (ret < 0)
 		goto fail;
-- 
cgit v0.10.2


From 608961a5eca8d3c6bd07172febc27b5559408c5d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 May 2008 21:59:32 -0700
Subject: mac80211: Use skb_header_cloned() on TX path.

When skb_header_cloned() returns false you can change the
headers however you like.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 28d8bd5..1d7dd54 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1562,13 +1562,13 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	 * be cloned. This could happen, e.g., with Linux bridge code passing
 	 * us broadcast frames. */
 
-	if (head_need > 0 || skb_cloned(skb)) {
+	if (head_need > 0 || skb_header_cloned(skb)) {
 #if 0
 		printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes "
 		       "of headroom\n", dev->name, head_need);
 #endif
 
-		if (skb_cloned(skb))
+		if (skb_header_cloned(skb))
 			I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
 		else
 			I802_DEBUG_INC(local->tx_expand_skb_head);
-- 
cgit v0.10.2


From ff772b27e5f65c1a186e9f0741f0d00ef7002799 Mon Sep 17 00:00:00 2001
From: Jay Cliburn <jacliburn@bellsouth.net>
Date: Fri, 9 May 2008 22:12:06 -0500
Subject: atl1: add PHY power save mode

Using vendor-provided magic, add code to enter power save mode
on the PHY.  We'll need this for suspend and wake-on-lan.

Signed-off-by: Jay Cliburn <jacliburn@bellsouth.net>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 0afe522..3beb44e 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -638,21 +638,18 @@ static s32 atl1_phy_leave_power_saving(struct atl1_hw *hw)
 }
 
 /*
- *TODO: do something or get rid of this
+ * Force the PHY into power saving mode using vendor magic.
  */
 #ifdef CONFIG_PM
-static s32 atl1_phy_enter_power_saving(struct atl1_hw *hw)
+static void atl1_phy_enter_power_saving(struct atl1_hw *hw)
 {
-/*    s32 ret_val;
- *    u16 phy_data;
- */
+	atl1_write_phy_reg(hw, MII_DBG_ADDR, 0);
+	atl1_write_phy_reg(hw, MII_DBG_DATA, 0x124E);
+	atl1_write_phy_reg(hw, MII_DBG_ADDR, 2);
+	atl1_write_phy_reg(hw, MII_DBG_DATA, 0x3000);
+	atl1_write_phy_reg(hw, MII_DBG_ADDR, 3);
+	atl1_write_phy_reg(hw, MII_DBG_DATA, 0);
 
-/*
-    ret_val = atl1_write_phy_reg(hw, ...);
-    ret_val = atl1_write_phy_reg(hw, ...);
-    ....
-*/
-	return 0;
 }
 #endif
 
diff --git a/drivers/net/atlx/atlx.h b/drivers/net/atlx/atlx.h
index 3be7c09..9672188 100644
--- a/drivers/net/atlx/atlx.h
+++ b/drivers/net/atlx/atlx.h
@@ -460,6 +460,9 @@ MODULE_VERSION(ATLX_DRIVER_VERSION);
 #define MII_ATLX_PSSR_100MBS		0x4000	/* 01=100Mbs */
 #define MII_ATLX_PSSR_1000MBS		0x8000	/* 10=1000Mbs */
 
+#define MII_DBG_ADDR			0x1D
+#define MII_DBG_DATA			0x1E
+
 /* PCI Command Register Bit Definitions */
 #define PCI_REG_COMMAND			0x04	/* PCI Command Register */
 #define CMD_IO_SPACE			0x0001
-- 
cgit v0.10.2


From 08e0f1dc8388b3e134c714672c59edc2a7059430 Mon Sep 17 00:00:00 2001
From: Jay Cliburn <jacliburn@bellsouth.net>
Date: Fri, 9 May 2008 22:12:07 -0500
Subject: atl1: fix broken suspend and resume

Fix atl1_suspend() and atl1_resume() so they actually work.  We'll use
the suspend function for wake-on-lan in addition to just suspending.

Signed-off-by: Jay Cliburn <jacliburn@bellsouth.net>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 3beb44e..12fb3e5 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -2781,64 +2781,93 @@ static int atl1_suspend(struct pci_dev *pdev, pm_message_t state)
 	struct atl1_hw *hw = &adapter->hw;
 	u32 ctrl = 0;
 	u32 wufc = adapter->wol;
+	u32 val;
+	int retval;
+	u16 speed;
+	u16 duplex;
 
 	netif_device_detach(netdev);
 	if (netif_running(netdev))
 		atl1_down(adapter);
 
+	retval = pci_save_state(pdev);
+	if (retval)
+		return retval;
+
 	atl1_read_phy_reg(hw, MII_BMSR, (u16 *) & ctrl);
 	atl1_read_phy_reg(hw, MII_BMSR, (u16 *) & ctrl);
-	if (ctrl & BMSR_LSTATUS)
+	val = ctrl & BMSR_LSTATUS;
+	if (val)
 		wufc &= ~ATLX_WUFC_LNKC;
 
-	/* reduce speed to 10/100M */
-	if (wufc) {
-		atl1_phy_enter_power_saving(hw);
-		/* if resume, let driver to re- setup link */
-		hw->phy_configured = false;
-		atl1_set_mac_addr(hw);
-		atlx_set_multi(netdev);
+	if (val && wufc) {
+		val = atl1_get_speed_and_duplex(hw, &speed, &duplex);
+		if (val) {
+			if (netif_msg_ifdown(adapter))
+				dev_printk(KERN_DEBUG, &pdev->dev,
+					"error getting speed/duplex\n");
+			goto disable_wol;
+		}
 
 		ctrl = 0;
-		/* turn on magic packet wol */
-		if (wufc & ATLX_WUFC_MAG)
-			ctrl = WOL_MAGIC_EN | WOL_MAGIC_PME_EN;
 
-		/* turn on Link change WOL */
-		if (wufc & ATLX_WUFC_LNKC)
-			ctrl |= (WOL_LINK_CHG_EN | WOL_LINK_CHG_PME_EN);
+		/* enable magic packet WOL */
+		if (wufc & ATLX_WUFC_MAG)
+			ctrl |= (WOL_MAGIC_EN | WOL_MAGIC_PME_EN);
 		iowrite32(ctrl, hw->hw_addr + REG_WOL_CTRL);
-
-		/* turn on all-multi mode if wake on multicast is enabled */
-		ctrl = ioread32(hw->hw_addr + REG_MAC_CTRL);
-		ctrl &= ~MAC_CTRL_DBG;
-		ctrl &= ~MAC_CTRL_PROMIS_EN;
-		if (wufc & ATLX_WUFC_MC)
-			ctrl |= MAC_CTRL_MC_ALL_EN;
-		else
-			ctrl &= ~MAC_CTRL_MC_ALL_EN;
-
-		/* turn on broadcast mode if wake on-BC is enabled */
-		if (wufc & ATLX_WUFC_BC)
+		ioread32(hw->hw_addr + REG_WOL_CTRL);
+
+		/* configure the mac */
+		ctrl = MAC_CTRL_RX_EN;
+		ctrl |= ((u32)((speed == SPEED_1000) ? MAC_CTRL_SPEED_1000 :
+			MAC_CTRL_SPEED_10_100) << MAC_CTRL_SPEED_SHIFT);
+		if (duplex == FULL_DUPLEX)
+			ctrl |= MAC_CTRL_DUPLX;
+		ctrl |= (((u32)adapter->hw.preamble_len &
+			MAC_CTRL_PRMLEN_MASK) << MAC_CTRL_PRMLEN_SHIFT);
+		if (adapter->vlgrp)
+			ctrl |= MAC_CTRL_RMV_VLAN;
+		if (wufc & ATLX_WUFC_MAG)
 			ctrl |= MAC_CTRL_BC_EN;
-		else
-			ctrl &= ~MAC_CTRL_BC_EN;
-
-		/* enable RX */
-		ctrl |= MAC_CTRL_RX_EN;
 		iowrite32(ctrl, hw->hw_addr + REG_MAC_CTRL);
-		pci_enable_wake(pdev, PCI_D3hot, 1);
-		pci_enable_wake(pdev, PCI_D3cold, 1);
-	} else {
-		iowrite32(0, hw->hw_addr + REG_WOL_CTRL);
-		pci_enable_wake(pdev, PCI_D3hot, 0);
-		pci_enable_wake(pdev, PCI_D3cold, 0);
+		ioread32(hw->hw_addr + REG_MAC_CTRL);
+
+		/* poke the PHY */
+		ctrl = ioread32(hw->hw_addr + REG_PCIE_PHYMISC);
+		ctrl |= PCIE_PHYMISC_FORCE_RCV_DET;
+		iowrite32(ctrl, hw->hw_addr + REG_PCIE_PHYMISC);
+		ioread32(hw->hw_addr + REG_PCIE_PHYMISC);
+
+		pci_enable_wake(pdev, pci_choose_state(pdev, state), 1);
+		goto exit;
 	}
 
-	pci_save_state(pdev);
+	if (!val && wufc) {
+		ctrl |= (WOL_LINK_CHG_EN | WOL_LINK_CHG_PME_EN);
+		iowrite32(ctrl, hw->hw_addr + REG_WOL_CTRL);
+		ioread32(hw->hw_addr + REG_WOL_CTRL);
+		iowrite32(0, hw->hw_addr + REG_MAC_CTRL);
+		ioread32(hw->hw_addr + REG_MAC_CTRL);
+		hw->phy_configured = false;
+		pci_enable_wake(pdev, pci_choose_state(pdev, state), 1);
+		goto exit;
+	}
+
+disable_wol:
+	iowrite32(0, hw->hw_addr + REG_WOL_CTRL);
+	ioread32(hw->hw_addr + REG_WOL_CTRL);
+	ctrl = ioread32(hw->hw_addr + REG_PCIE_PHYMISC);
+	ctrl |= PCIE_PHYMISC_FORCE_RCV_DET;
+	iowrite32(ctrl, hw->hw_addr + REG_PCIE_PHYMISC);
+	ioread32(hw->hw_addr + REG_PCIE_PHYMISC);
+	atl1_phy_enter_power_saving(hw);
+	hw->phy_configured = false;
+	pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
+exit:
+	if (netif_running(netdev))
+		pci_disable_msi(adapter->pdev);
 	pci_disable_device(pdev);
-
-	pci_set_power_state(pdev, PCI_D3hot);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
 
 	return 0;
 }
@@ -2852,20 +2881,26 @@ static int atl1_resume(struct pci_dev *pdev)
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 
-	/* FIXME: check and handle */
 	err = pci_enable_device(pdev);
+	if (err) {
+		if (netif_msg_ifup(adapter))
+			dev_printk(KERN_DEBUG, &pdev->dev,
+				"error enabling pci device\n");
+		return err;
+	}
+
+	pci_set_master(pdev);
+	iowrite32(0, adapter->hw.hw_addr + REG_WOL_CTRL);
 	pci_enable_wake(pdev, PCI_D3hot, 0);
 	pci_enable_wake(pdev, PCI_D3cold, 0);
 
-	iowrite32(0, adapter->hw.hw_addr + REG_WOL_CTRL);
-	atl1_reset(adapter);
+	atl1_reset_hw(&adapter->hw);
+	adapter->cmb.cmb->int_stats = 0;
 
 	if (netif_running(netdev))
 		atl1_up(adapter);
 	netif_device_attach(netdev);
 
-	atl1_via_workaround(adapter);
-
 	return 0;
 }
 #else
-- 
cgit v0.10.2


From bf455a2247c6abe7d0debfbf2974514b5144ed4d Mon Sep 17 00:00:00 2001
From: Jay Cliburn <jacliburn@bellsouth.net>
Date: Fri, 9 May 2008 22:12:08 -0500
Subject: atl1: add shutdown callback

Add a shutdown callback that points to atl1_suspend().  This, along
with a working suspend function, fixes wake-on-lan.

Tested-by: Per Olofsson <pelle@dsv.su.se>
Signed-off-by: Jay Cliburn <jacliburn@bellsouth.net>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 12fb3e5..b7092a3 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -36,7 +36,6 @@
  * A very incomplete list of things that need to be dealt with:
  *
  * TODO:
- * Wake on LAN.
  * Add more ethtool functions.
  * Fix abstruse irq enable/disable condition described here:
  *	http://marc.theaimsgroup.com/?l=linux-netdev&m=116398508500553&w=2
@@ -2908,6 +2907,13 @@ static int atl1_resume(struct pci_dev *pdev)
 #define atl1_resume NULL
 #endif
 
+static void atl1_shutdown(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PM
+	atl1_suspend(pdev, PMSG_SUSPEND);
+#endif
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void atl1_poll_controller(struct net_device *netdev)
 {
@@ -3154,7 +3160,8 @@ static struct pci_driver atl1_driver = {
 	.probe = atl1_probe,
 	.remove = __devexit_p(atl1_remove),
 	.suspend = atl1_suspend,
-	.resume = atl1_resume
+	.resume = atl1_resume,
+	.shutdown = atl1_shutdown
 };
 
 /*
-- 
cgit v0.10.2


From e8f720fdec08daa669f46c8d76da0714f6872ccc Mon Sep 17 00:00:00 2001
From: Jay Cliburn <jacliburn@bellsouth.net>
Date: Fri, 9 May 2008 22:12:09 -0500
Subject: atl1: bump version number

atl1-2.1.3.

Signed-off-by: Jay Cliburn <jacliburn@bellsouth.net>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index b7092a3..9c2394d 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -1,7 +1,7 @@
 /*
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
  * Copyright(c) 2006 - 2007 Chris Snook <csnook@redhat.com>
- * Copyright(c) 2006 Jay Cliburn <jcliburn@gmail.com>
+ * Copyright(c) 2006 - 2008 Jay Cliburn <jcliburn@gmail.com>
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
diff --git a/drivers/net/atlx/atl1.h b/drivers/net/atlx/atl1.h
index 51893d6..a5015b1 100644
--- a/drivers/net/atlx/atl1.h
+++ b/drivers/net/atlx/atl1.h
@@ -1,7 +1,7 @@
 /*
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
  * Copyright(c) 2006 - 2007 Chris Snook <csnook@redhat.com>
- * Copyright(c) 2006 Jay Cliburn <jcliburn@gmail.com>
+ * Copyright(c) 2006 - 2008 Jay Cliburn <jcliburn@gmail.com>
  *
  * Derived from Intel e1000 driver
  * Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved.
diff --git a/drivers/net/atlx/atlx.c b/drivers/net/atlx/atlx.c
index f06b854..b3e7fcf 100644
--- a/drivers/net/atlx/atlx.c
+++ b/drivers/net/atlx/atlx.c
@@ -2,7 +2,7 @@
  *
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
  * Copyright(c) 2006 - 2007 Chris Snook <csnook@redhat.com>
- * Copyright(c) 2006 Jay Cliburn <jcliburn@gmail.com>
+ * Copyright(c) 2006 - 2008 Jay Cliburn <jcliburn@gmail.com>
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
diff --git a/drivers/net/atlx/atlx.h b/drivers/net/atlx/atlx.h
index 9672188..297a03d 100644
--- a/drivers/net/atlx/atlx.h
+++ b/drivers/net/atlx/atlx.h
@@ -2,7 +2,7 @@
  *
  * Copyright(c) 2005 - 2006 Attansic Corporation. All rights reserved.
  * Copyright(c) 2006 - 2007 Chris Snook <csnook@redhat.com>
- * Copyright(c) 2006 Jay Cliburn <jcliburn@gmail.com>
+ * Copyright(c) 2006 - 2008 Jay Cliburn <jcliburn@gmail.com>
  * Copyright(c) 2007 Atheros Corporation. All rights reserved.
  *
  * Derived from Intel e1000 driver
@@ -29,7 +29,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 
-#define ATLX_DRIVER_VERSION "2.1.1"
+#define ATLX_DRIVER_VERSION "2.1.3"
 MODULE_AUTHOR("Xiong Huang <xiong.huang@atheros.com>, \
 	Chris Snook <csnook@redhat.com>, Jay Cliburn <jcliburn@gmail.com>");
 MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 0f7229dde3f2b5373e26e7d7dd35012bd975e452 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:16:19 +0200
Subject: myri10ge: update firmware headers

Update myri10ge firmware headers.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge_mcp.h b/drivers/net/myri10ge/myri10ge_mcp.h
index 58e5717..fdbeeee 100644
--- a/drivers/net/myri10ge/myri10ge_mcp.h
+++ b/drivers/net/myri10ge/myri10ge_mcp.h
@@ -10,7 +10,7 @@ struct mcp_dma_addr {
 	__be32 low;
 };
 
-/* 4 Bytes.  8 Bytes for NDIS drivers. */
+/* 4 Bytes */
 struct mcp_slot {
 	__sum16 checksum;
 	__be16 length;
@@ -144,6 +144,7 @@ enum myri10ge_mcp_cmd_type {
 	 * a power of 2 number of entries.  */
 
 	MXGEFW_CMD_SET_INTRQ_SIZE,	/* in bytes */
+#define MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK  (1 << 31)
 
 	/* command to bring ethernet interface up.  Above parameters
 	 * (plus mtu & mac address) must have been exchanged prior
@@ -221,10 +222,14 @@ enum myri10ge_mcp_cmd_type {
 	MXGEFW_CMD_GET_MAX_RSS_QUEUES,
 	MXGEFW_CMD_ENABLE_RSS_QUEUES,
 	/* data0 = number of slices n (0, 1, ..., n-1) to enable
-	 * data1 = interrupt mode. 0=share one INTx/MSI, 1=use one MSI-X per queue.
+	 * data1 = interrupt mode.
+	 * 0=share one INTx/MSI, 1=use one MSI-X per queue.
 	 * If all queues share one interrupt, the driver must have set
 	 * RSS_SHARED_INTERRUPT_DMA before enabling queues.
 	 */
+#define MXGEFW_SLICE_INTR_MODE_SHARED 0
+#define MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE 1
+
 	MXGEFW_CMD_GET_RSS_SHARED_INTERRUPT_MASK_OFFSET,
 	MXGEFW_CMD_SET_RSS_SHARED_INTERRUPT_DMA,
 	/* data0, data1 = bus address lsw, msw */
@@ -241,10 +246,14 @@ enum myri10ge_mcp_cmd_type {
 	 * 0: disable rss.  nic does not distribute receive packets.
 	 * 1: enable rss.  nic distributes receive packets among queues.
 	 * data1 = hash type
-	 * 1: IPV4
-	 * 2: TCP_IPV4
-	 * 3: IPV4 | TCP_IPV4
+	 * 1: IPV4            (required by RSS)
+	 * 2: TCP_IPV4        (required by RSS)
+	 * 3: IPV4 | TCP_IPV4 (required by RSS)
+	 * 4: source port
 	 */
+#define MXGEFW_RSS_HASH_TYPE_IPV4      0x1
+#define MXGEFW_RSS_HASH_TYPE_TCP_IPV4  0x2
+#define MXGEFW_RSS_HASH_TYPE_SRC_PORT  0x4
 
 	MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
 	/* Return data = the max. size of the entire headers of a IPv6 TSO packet.
@@ -260,6 +269,8 @@ enum myri10ge_mcp_cmd_type {
 	 * 0: Linux/FreeBSD style (NIC default)
 	 * 1: NDIS/NetBSD style
 	 */
+#define MXGEFW_TSO_MODE_LINUX  0
+#define MXGEFW_TSO_MODE_NDIS   1
 
 	MXGEFW_CMD_MDIO_READ,
 	/* data0 = dev_addr (PMA/PMD or PCS ...), data1 = register/addr */
@@ -286,6 +297,38 @@ enum myri10ge_mcp_cmd_type {
 	/* Return data = NIC memory offset of mcp_vpump_public_global */
 	MXGEFW_CMD_RESET_VPUMP,
 	/* Resets the VPUMP state */
+
+	MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE,
+	/* data0 = mcp_slot type to use.
+	 * 0 = the default 4B mcp_slot
+	 * 1 = 8B mcp_slot_8
+	 */
+#define MXGEFW_RSS_MCP_SLOT_TYPE_MIN        0
+#define MXGEFW_RSS_MCP_SLOT_TYPE_WITH_HASH  1
+
+	MXGEFW_CMD_SET_THROTTLE_FACTOR,
+	/* set the throttle factor for ethp_z8e
+	 * data0 = throttle_factor
+	 * throttle_factor = 256 * pcie-raw-speed / tx_speed
+	 * tx_speed = 256 * pcie-raw-speed / throttle_factor
+	 *
+	 * For PCI-E x8: pcie-raw-speed == 16Gb/s
+	 * For PCI-E x4: pcie-raw-speed == 8Gb/s
+	 *
+	 * ex1: throttle_factor == 0x1a0 (416), tx_speed == 1.23GB/s == 9.846 Gb/s
+	 * ex2: throttle_factor == 0x200 (512), tx_speed == 1.0GB/s == 8 Gb/s
+	 *
+	 * with tx_boundary == 2048, max-throttle-factor == 8191 => min-speed == 500Mb/s
+	 * with tx_boundary == 4096, max-throttle-factor == 4095 => min-speed == 1Gb/s
+	 */
+
+	MXGEFW_CMD_VPUMP_UP,
+	/* Allocates VPump Connection, Send Request and Zero copy buffer address tables */
+	MXGEFW_CMD_GET_VPUMP_CLK,
+	/* Get the lanai clock */
+
+	MXGEFW_CMD_GET_DCA_OFFSET,
+	/* offset of dca control for WDMAs */
 };
 
 enum myri10ge_mcp_cmd_status {
@@ -302,7 +345,8 @@ enum myri10ge_mcp_cmd_status {
 	MXGEFW_CMD_ERROR_UNALIGNED,
 	MXGEFW_CMD_ERROR_NO_MDIO,
 	MXGEFW_CMD_ERROR_XFP_FAILURE,
-	MXGEFW_CMD_ERROR_XFP_ABSENT
+	MXGEFW_CMD_ERROR_XFP_ABSENT,
+	MXGEFW_CMD_ERROR_BAD_PCIE_LINK
 };
 
 #define MXGEFW_OLD_IRQ_DATA_LEN 40
diff --git a/drivers/net/myri10ge/myri10ge_mcp_gen_header.h b/drivers/net/myri10ge/myri10ge_mcp_gen_header.h
index 16a810d..07d65c2 100644
--- a/drivers/net/myri10ge/myri10ge_mcp_gen_header.h
+++ b/drivers/net/myri10ge/myri10ge_mcp_gen_header.h
@@ -1,30 +1,6 @@
 #ifndef __MYRI10GE_MCP_GEN_HEADER_H__
 #define __MYRI10GE_MCP_GEN_HEADER_H__
 
-/* this file define a standard header used as a first entry point to
- * exchange information between firmware/driver and driver.  The
- * header structure can be anywhere in the mcp. It will usually be in
- * the .data section, because some fields needs to be initialized at
- * compile time.
- * The 32bit word at offset MX_HEADER_PTR_OFFSET in the mcp must
- * contains the location of the header.
- *
- * Typically a MCP will start with the following:
- * .text
- * .space 52    ! to help catch MEMORY_INT errors
- * bt start     ! jump to real code
- * nop
- * .long _gen_mcp_header
- *
- * The source will have a definition like:
- *
- * mcp_gen_header_t gen_mcp_header = {
- * .header_length = sizeof(mcp_gen_header_t),
- * .mcp_type = MCP_TYPE_XXX,
- * .version = "something $Id: mcp_gen_header.h,v 1.2 2006/05/13 10:04:35 bgoglin Exp $",
- * .mcp_globals = (unsigned)&Globals
- * };
- */
 
 #define MCP_HEADER_PTR_OFFSET  0x3c
 
@@ -32,13 +8,14 @@
 #define MCP_TYPE_PCIE 0x70636965	/* "PCIE" pcie-only MCP */
 #define MCP_TYPE_ETH 0x45544820	/* "ETH " */
 #define MCP_TYPE_MCP0 0x4d435030	/* "MCP0" */
+#define MCP_TYPE_DFLT 0x20202020	/* "    " */
 
 struct mcp_gen_header {
 	/* the first 4 fields are filled at compile time */
 	unsigned header_length;
 	__be32 mcp_type;
 	char version[128];
-	unsigned mcp_globals;	/* pointer to mcp-type specific structure */
+	unsigned mcp_private;	/* pointer to mcp-type specific structure */
 
 	/* filled by the MCP at run-time */
 	unsigned sram_size;
@@ -53,6 +30,18 @@ struct mcp_gen_header {
 	 *
 	 * Never remove any field.  Keep everything naturally align.
 	 */
+
+	/* Specifies if the running mcp is mcp0, 1, or 2. */
+	unsigned char mcp_index;
+	unsigned char disable_rabbit;
+	unsigned char unaligned_tlp;
+	unsigned char pad1;
+	unsigned counters_addr;
+	unsigned copy_block_info;	/* for small mcps loaded with "lload -d" */
+	unsigned short handoff_id_major;	/* must be equal */
+	unsigned short handoff_id_caps;	/* bitfield: new mcp must have superset */
+	unsigned msix_table_addr;	/* start address of msix table in firmware */
+	/* 8 */
 };
 
 #endif				/* __MYRI10GE_MCP_GEN_HEADER_H__ */
-- 
cgit v0.10.2


From d1ce3a0f1a07b48e16ebbc71886086779b52f630 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:16:53 +0200
Subject: myri10ge: fix module parameter descriptions

Remove useless linebreaks at the end of MODULE_PARM_DESC
and fix the description of myri10ge_lro_max_pkts.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index ef63c8d..162c624 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -228,58 +228,58 @@ static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat";
 
 static char *myri10ge_fw_name = NULL;
 module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name\n");
+MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
 
 static int myri10ge_ecrc_enable = 1;
 module_param(myri10ge_ecrc_enable, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E\n");
+MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E");
 
 static int myri10ge_max_intr_slots = 1024;
 module_param(myri10ge_max_intr_slots, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_max_intr_slots, "Interrupt queue slots\n");
+MODULE_PARM_DESC(myri10ge_max_intr_slots, "Interrupt queue slots");
 
 static int myri10ge_small_bytes = -1;	/* -1 == auto */
 module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets\n");
+MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets");
 
 static int myri10ge_msi = 1;	/* enable msi by default */
 module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts\n");
+MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts");
 
 static int myri10ge_intr_coal_delay = 75;
 module_param(myri10ge_intr_coal_delay, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay\n");
+MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay");
 
 static int myri10ge_flow_control = 1;
 module_param(myri10ge_flow_control, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter\n");
+MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter");
 
 static int myri10ge_deassert_wait = 1;
 module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(myri10ge_deassert_wait,
-		 "Wait when deasserting legacy interrupts\n");
+		 "Wait when deasserting legacy interrupts");
 
 static int myri10ge_force_firmware = 0;
 module_param(myri10ge_force_firmware, int, S_IRUGO);
 MODULE_PARM_DESC(myri10ge_force_firmware,
-		 "Force firmware to assume aligned completions\n");
+		 "Force firmware to assume aligned completions");
 
 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
 module_param(myri10ge_initial_mtu, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU\n");
+MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU");
 
 static int myri10ge_napi_weight = 64;
 module_param(myri10ge_napi_weight, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight\n");
+MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight");
 
 static int myri10ge_watchdog_timeout = 1;
 module_param(myri10ge_watchdog_timeout, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout\n");
+MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout");
 
 static int myri10ge_max_irq_loops = 1048576;
 module_param(myri10ge_max_irq_loops, int, S_IRUGO);
 MODULE_PARM_DESC(myri10ge_max_irq_loops,
-		 "Set stuck legacy IRQ detection threshold\n");
+		 "Set stuck legacy IRQ detection threshold");
 
 #define MYRI10GE_MSG_DEFAULT NETIF_MSG_LINK
 
@@ -289,21 +289,22 @@ MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)");
 
 static int myri10ge_lro = 1;
 module_param(myri10ge_lro, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_lro, "Enable large receive offload\n");
+MODULE_PARM_DESC(myri10ge_lro, "Enable large receive offload");
 
 static int myri10ge_lro_max_pkts = MYRI10GE_LRO_MAX_PKTS;
 module_param(myri10ge_lro_max_pkts, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_lro, "Number of LRO packets to be aggregated\n");
+MODULE_PARM_DESC(myri10ge_lro_max_pkts,
+		 "Number of LRO packets to be aggregated");
 
 static int myri10ge_fill_thresh = 256;
 module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed\n");
+MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed");
 
 static int myri10ge_reset_recover = 1;
 
 static int myri10ge_wcfifo = 0;
 module_param(myri10ge_wcfifo, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_wcfifo, "Enable WC Fifo when WC is enabled\n");
+MODULE_PARM_DESC(myri10ge_wcfifo, "Enable WC Fifo when WC is enabled");
 
 #define MYRI10GE_FW_OFFSET 1024*1024
 #define MYRI10GE_HIGHPART_TO_U32(X) \
-- 
cgit v0.10.2


From d93ca2a453f8e5734359267866ab4f3341aa8749 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:17:16 +0200
Subject: myri10ge: increase and fix handoff timeout

Increase the handoff timeout to 512ms so as to give the aeluros based
NICs sufficient time to handoff without relying on the msleep() being
sloppy, and accidentally sleeping way longer than the 20ms we specified
in 20 separate 1ms sleeps.

Fix typo in the handoff sleep delay, which made it additive, not
exponential.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 162c624..ad6c619 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -682,8 +682,8 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp)
 	msleep(1);
 	mb();
 	i = 0;
-	while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 20) {
-		msleep(1);
+	while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) {
+		msleep(1 << i);
 		i++;
 	}
 	if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) {
-- 
cgit v0.10.2


From f8fd57c11159d89d0d9cd624eafad41c680e8f6e Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:17:37 +0200
Subject: myri10ge: properly align scratch buffers

Properly align scratch buffers when making boot commands.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index ad6c619..3f871c4 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -443,7 +443,7 @@ abort:
 static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
 {
 	char __iomem *submit;
-	__be32 buf[16];
+	__be32 buf[16] __attribute__ ((__aligned__(8)));
 	u32 dma_low, dma_high;
 	int i;
 
@@ -613,7 +613,7 @@ static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp)
 static int myri10ge_load_firmware(struct myri10ge_priv *mgp)
 {
 	char __iomem *submit;
-	__be32 buf[16];
+	__be32 buf[16] __attribute__ ((__aligned__(8)));
 	u32 dma_low, dma_high, size;
 	int status, i;
 	struct myri10ge_cmd cmd;
-- 
cgit v0.10.2


From c0bf8801535d45df3597839edf864e24f60a4188 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:18:24 +0200
Subject: myri10ge: report FIBER in ethtool for XFP based NIC

Make ethtool report FIBER for XFP based NIC's port type.
Don't bother to poke around and try to find out what is in
the XFP cage, since Linux does not have separate media types
for -SR -LR, etc.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 3f871c4..4a65e41 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -205,6 +205,7 @@ struct myri10ge_priv {
 	int pause;
 	char *fw_name;
 	char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE];
+	char *product_code_string;
 	char fw_version[128];
 	int fw_ver_major;
 	int fw_ver_minor;
@@ -421,6 +422,10 @@ static int myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
 				ptr += 1;
 			}
 		}
+		if (memcmp(ptr, "PC=", 3) == 0) {
+			ptr += 3;
+			mgp->product_code_string = ptr;
+		}
 		if (memcmp((const void *)ptr, "SN=", 3) == 0) {
 			ptr += 3;
 			mgp->serial_number = simple_strtoul(ptr, &ptr, 10);
@@ -1304,9 +1309,39 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 static int
 myri10ge_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
 {
+	struct myri10ge_priv *mgp = netdev_priv(netdev);
+	char *ptr;
+	int i;
+
 	cmd->autoneg = AUTONEG_DISABLE;
 	cmd->speed = SPEED_10000;
 	cmd->duplex = DUPLEX_FULL;
+
+	/*
+	 * parse the product code to deterimine the interface type
+	 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
+	 * after the 3rd dash in the driver's cached copy of the
+	 * EEPROM's product code string.
+	 */
+	ptr = mgp->product_code_string;
+	if (ptr == NULL) {
+		printk(KERN_ERR "myri10ge: %s: Missing product code\n",
+			netdev->name);
+		return 0;
+	}
+	for (i = 0; i < 3; i++, ptr++) {
+		ptr = strchr(ptr, '-');
+		if (ptr == NULL) {
+			printk(KERN_ERR "myri10ge: %s: Invalid product "
+			       "code %s\n", netdev->name,
+			       mgp->product_code_string);
+			return 0;
+		}
+	}
+	if (*ptr == 'R' || *ptr == 'Q') {
+		/* We've found either an XFP or quad ribbon fiber */
+		cmd->port = PORT_FIBRE;
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From bd2db0cf2411ebc081d45bde1b7c6cf726b832f2 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:18:45 +0200
Subject: myri10ge: add barrier in myri10ge_send_cmd

Add a barrier() in the usleep() loop in  myri10ge_send_cmd().
Without the barrier, some mips machine never notices that the
firmware has DMA'ed the response.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 4a65e41..48fe624 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -361,8 +361,10 @@ myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
 		for (sleep_total = 0;
 		     sleep_total < 1000
 		     && response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT);
-		     sleep_total += 10)
+		     sleep_total += 10) {
 			udelay(10);
+			mb();
+		}
 	} else {
 		/* use msleep for most command */
 		for (sleep_total = 0;
-- 
cgit v0.10.2


From 99f5f87eb689c5766fa2c101fe75310a7f9ba3cd Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:19:08 +0200
Subject: myri10ge: trivial formatting fix

Add some blank lines to uniformize the code and match
the upstream code.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 48fe624..9165a55 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1328,7 +1328,7 @@ myri10ge_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
 	ptr = mgp->product_code_string;
 	if (ptr == NULL) {
 		printk(KERN_ERR "myri10ge: %s: Missing product code\n",
-			netdev->name);
+		       netdev->name);
 		return 0;
 	}
 	for (i = 0; i < 3; i++, ptr++) {
@@ -1362,6 +1362,7 @@ static int
 myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
 {
 	struct myri10ge_priv *mgp = netdev_priv(netdev);
+
 	coal->rx_coalesce_usecs = mgp->intr_coal_delay;
 	return 0;
 }
@@ -1421,6 +1422,7 @@ myri10ge_get_ringparam(struct net_device *netdev,
 static u32 myri10ge_get_rx_csum(struct net_device *netdev)
 {
 	struct myri10ge_priv *mgp = netdev_priv(netdev);
+
 	if (mgp->csum_flag)
 		return 1;
 	else
@@ -1430,6 +1432,7 @@ static u32 myri10ge_get_rx_csum(struct net_device *netdev)
 static int myri10ge_set_rx_csum(struct net_device *netdev, u32 csum_enabled)
 {
 	struct myri10ge_priv *mgp = netdev_priv(netdev);
+
 	if (csum_enabled)
 		mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
 	else
-- 
cgit v0.10.2


From eca3fd83436853483837f010d9c3fefafa46a15c Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:19:29 +0200
Subject: myri10ge: fix potential infinite loop in enable_ecrc

Fix another potential for an infinite loop while looking for the
root port in myri10ge_enable_ecrc().

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 9165a55..6526214 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -2657,13 +2657,14 @@ static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp)
 	ext_type = (val & PCI_EXP_FLAGS_TYPE) >> 4;
 	if (ext_type != PCI_EXP_TYPE_ROOT_PORT) {
 		if (myri10ge_ecrc_enable > 1) {
-			struct pci_dev *old_bridge = bridge;
+			struct pci_dev *prev_bridge, *old_bridge = bridge;
 
 			/* Walk the hierarchy up to the root port
 			 * where ECRC has to be enabled */
 			do {
+				prev_bridge = bridge;
 				bridge = bridge->bus->self;
-				if (!bridge) {
+				if (!bridge || prev_bridge == bridge) {
 					dev_err(dev,
 						"Failed to find root port"
 						" to force ECRC\n");
-- 
cgit v0.10.2


From b53bef84c27e68efac9b608392acd1fc14cb6ce7 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:20:03 +0200
Subject: myri10ge: move data structures into a single slice

To prepare and simplify multislice rx support, add a single slice
structure and move some fields in there.
No functional change yet.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 6526214..5edcbfe 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -144,11 +144,13 @@ struct myri10ge_tx_buf {
 	char *req_bytes;
 	struct myri10ge_tx_buffer_state *info;
 	int mask;		/* number of transmit slots -1  */
-	int boundary;		/* boundary transmits cannot cross */
 	int req ____cacheline_aligned;	/* transmit slots submitted     */
 	int pkt_start;		/* packets started */
+	int stop_queue;
+	int linearized;
 	int done ____cacheline_aligned;	/* transmit slots completed     */
 	int pkt_done;		/* packets completed */
+	int wake_queue;
 };
 
 struct myri10ge_rx_done {
@@ -160,29 +162,49 @@ struct myri10ge_rx_done {
 	struct net_lro_desc lro_desc[MYRI10GE_MAX_LRO_DESCRIPTORS];
 };
 
-struct myri10ge_priv {
-	int running;		/* running?             */
-	int csum_flag;		/* rx_csums?            */
+struct myri10ge_slice_netstats {
+	unsigned long rx_packets;
+	unsigned long tx_packets;
+	unsigned long rx_bytes;
+	unsigned long tx_bytes;
+	unsigned long rx_dropped;
+	unsigned long tx_dropped;
+};
+
+struct myri10ge_slice_state {
 	struct myri10ge_tx_buf tx;	/* transmit ring        */
 	struct myri10ge_rx_buf rx_small;
 	struct myri10ge_rx_buf rx_big;
 	struct myri10ge_rx_done rx_done;
+	struct net_device *dev;
+	struct napi_struct napi;
+	struct myri10ge_priv *mgp;
+	struct myri10ge_slice_netstats stats;
+	__be32 __iomem *irq_claim;
+	struct mcp_irq_data *fw_stats;
+	dma_addr_t fw_stats_bus;
+	int watchdog_tx_done;
+	int watchdog_tx_req;
+};
+
+struct myri10ge_priv {
+	struct myri10ge_slice_state ss;
+	int tx_boundary;	/* boundary transmits cannot cross */
+	int running;		/* running?             */
+	int csum_flag;		/* rx_csums?            */
 	int small_bytes;
 	int big_bytes;
 	struct net_device *dev;
-	struct napi_struct napi;
 	struct net_device_stats stats;
+	spinlock_t stats_lock;
 	u8 __iomem *sram;
 	int sram_size;
 	unsigned long board_span;
 	unsigned long iomem_base;
-	__be32 __iomem *irq_claim;
 	__be32 __iomem *irq_deassert;
 	char *mac_addr_string;
 	struct mcp_cmd_response *cmd;
 	dma_addr_t cmd_bus;
-	struct mcp_irq_data *fw_stats;
-	dma_addr_t fw_stats_bus;
 	struct pci_dev *pdev;
 	int msi_enabled;
 	u32 link_state;
@@ -191,17 +213,12 @@ struct myri10ge_priv {
 	__be32 __iomem *intr_coal_delay_ptr;
 	int mtrr;
 	int wc_enabled;
-	int wake_queue;
-	int stop_queue;
 	int down_cnt;
 	wait_queue_head_t down_wq;
 	struct work_struct watchdog_work;
 	struct timer_list watchdog_timer;
-	int watchdog_tx_done;
-	int watchdog_tx_req;
-	int watchdog_pause;
 	int watchdog_resets;
-	int tx_linearized;
+	int watchdog_pause;
 	int pause;
 	char *fw_name;
 	char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE];
@@ -643,7 +660,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp)
 		}
 		dev_info(&mgp->pdev->dev,
 			 "Successfully adopted running firmware\n");
-		if (mgp->tx.boundary == 4096) {
+		if (mgp->tx_boundary == 4096) {
 			dev_warn(&mgp->pdev->dev,
 				 "Using firmware currently running on NIC"
 				 ".  For optimal\n");
@@ -654,7 +671,7 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp)
 		}
 
 		mgp->fw_name = "adopted";
-		mgp->tx.boundary = 2048;
+		mgp->tx_boundary = 2048;
 		return status;
 	}
 
@@ -780,7 +797,7 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
 	 * transfers took to complete.
 	 */
 
-	len = mgp->tx.boundary;
+	len = mgp->tx_boundary;
 
 	cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
 	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
@@ -842,17 +859,17 @@ static int myri10ge_reset(struct myri10ge_priv *mgp)
 
 	/* Now exchange information about interrupts  */
 
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->rx_done.entry);
-	memset(mgp->rx_done.entry, 0, bytes);
+	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
+	memset(mgp->ss.rx_done.entry, 0, bytes);
 	cmd.data0 = (u32) bytes;
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0);
-	cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->rx_done.bus);
-	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->rx_done.bus);
+	cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->ss.rx_done.bus);
+	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->ss.rx_done.bus);
 	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, &cmd, 0);
 
 	status |=
 	    myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0);
-	mgp->irq_claim = (__iomem __be32 *) (mgp->sram + cmd.data0);
+	mgp->ss.irq_claim = (__iomem __be32 *) (mgp->sram + cmd.data0);
 	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
 				    &cmd, 0);
 	mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0);
@@ -866,17 +883,17 @@ static int myri10ge_reset(struct myri10ge_priv *mgp)
 	}
 	put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr);
 
-	memset(mgp->rx_done.entry, 0, bytes);
+	memset(mgp->ss.rx_done.entry, 0, bytes);
 
 	/* reset mcp/driver shared state back to 0 */
-	mgp->tx.req = 0;
-	mgp->tx.done = 0;
-	mgp->tx.pkt_start = 0;
-	mgp->tx.pkt_done = 0;
-	mgp->rx_big.cnt = 0;
-	mgp->rx_small.cnt = 0;
-	mgp->rx_done.idx = 0;
-	mgp->rx_done.cnt = 0;
+	mgp->ss.tx.req = 0;
+	mgp->ss.tx.done = 0;
+	mgp->ss.tx.pkt_start = 0;
+	mgp->ss.tx.pkt_done = 0;
+	mgp->ss.rx_big.cnt = 0;
+	mgp->ss.rx_small.cnt = 0;
+	mgp->ss.rx_done.idx = 0;
+	mgp->ss.rx_done.cnt = 0;
 	mgp->link_changes = 0;
 	status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr);
 	myri10ge_change_pause(mgp, mgp->pause);
@@ -1028,9 +1045,10 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
 				 * page into an skb */
 
 static inline int
-myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
+myri10ge_rx_done(struct myri10ge_slice_state *ss, struct myri10ge_rx_buf *rx,
 		 int bytes, int len, __wsum csum)
 {
+	struct myri10ge_priv *mgp = ss->mgp;
 	struct sk_buff *skb;
 	struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME];
 	int i, idx, hlen, remainder;
@@ -1060,11 +1078,10 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 		rx_frags[0].page_offset += MXGEFW_PAD;
 		rx_frags[0].size -= MXGEFW_PAD;
 		len -= MXGEFW_PAD;
-		lro_receive_frags(&mgp->rx_done.lro_mgr, rx_frags,
+		lro_receive_frags(&ss->rx_done.lro_mgr, rx_frags,
 				  len, len,
-				 /* opaque, will come back in get_frag_header */
-				  (void *)(__force unsigned long)csum,
-				  csum);
+				  /* opaque, will come back in get_frag_header */
+				  (void *)(__force unsigned long)csum, csum);
 		return 1;
 	}
 
@@ -1104,10 +1121,11 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 	return 1;
 }
 
-static inline void myri10ge_tx_done(struct myri10ge_priv *mgp, int mcp_index)
+static inline void
+myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
 {
-	struct pci_dev *pdev = mgp->pdev;
-	struct myri10ge_tx_buf *tx = &mgp->tx;
+	struct pci_dev *pdev = ss->mgp->pdev;
+	struct myri10ge_tx_buf *tx = &ss->tx;
 	struct sk_buff *skb;
 	int idx, len;
 
@@ -1125,8 +1143,8 @@ static inline void myri10ge_tx_done(struct myri10ge_priv *mgp, int mcp_index)
 		len = pci_unmap_len(&tx->info[idx], len);
 		pci_unmap_len_set(&tx->info[idx], len, 0);
 		if (skb) {
-			mgp->stats.tx_bytes += skb->len;
-			mgp->stats.tx_packets++;
+			ss->stats.tx_bytes += skb->len;
+			ss->stats.tx_packets++;
 			dev_kfree_skb_irq(skb);
 			if (len)
 				pci_unmap_single(pdev,
@@ -1142,16 +1160,18 @@ static inline void myri10ge_tx_done(struct myri10ge_priv *mgp, int mcp_index)
 		}
 	}
 	/* start the queue if we've stopped it */
-	if (netif_queue_stopped(mgp->dev)
+	if (netif_queue_stopped(ss->dev)
 	    && tx->req - tx->done < (tx->mask >> 1)) {
-		mgp->wake_queue++;
-		netif_wake_queue(mgp->dev);
+		tx->wake_queue++;
+		netif_wake_queue(ss->dev);
 	}
 }
 
-static inline int myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int budget)
+static inline int
+myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
 {
-	struct myri10ge_rx_done *rx_done = &mgp->rx_done;
+	struct myri10ge_rx_done *rx_done = &ss->rx_done;
+	struct myri10ge_priv *mgp = ss->mgp;
 	unsigned long rx_bytes = 0;
 	unsigned long rx_packets = 0;
 	unsigned long rx_ok;
@@ -1167,11 +1187,11 @@ static inline int myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int budget)
 		rx_done->entry[idx].length = 0;
 		checksum = csum_unfold(rx_done->entry[idx].checksum);
 		if (length <= mgp->small_bytes)
-			rx_ok = myri10ge_rx_done(mgp, &mgp->rx_small,
+			rx_ok = myri10ge_rx_done(ss, &ss->rx_small,
 						 mgp->small_bytes,
 						 length, checksum);
 		else
-			rx_ok = myri10ge_rx_done(mgp, &mgp->rx_big,
+			rx_ok = myri10ge_rx_done(ss, &ss->rx_big,
 						 mgp->big_bytes,
 						 length, checksum);
 		rx_packets += rx_ok;
@@ -1182,25 +1202,25 @@ static inline int myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int budget)
 	}
 	rx_done->idx = idx;
 	rx_done->cnt = cnt;
-	mgp->stats.rx_packets += rx_packets;
-	mgp->stats.rx_bytes += rx_bytes;
+	ss->stats.rx_packets += rx_packets;
+	ss->stats.rx_bytes += rx_bytes;
 
 	if (myri10ge_lro)
 		lro_flush_all(&rx_done->lro_mgr);
 
 	/* restock receive rings if needed */
-	if (mgp->rx_small.fill_cnt - mgp->rx_small.cnt < myri10ge_fill_thresh)
-		myri10ge_alloc_rx_pages(mgp, &mgp->rx_small,
+	if (ss->rx_small.fill_cnt - ss->rx_small.cnt < myri10ge_fill_thresh)
+		myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
 					mgp->small_bytes + MXGEFW_PAD, 0);
-	if (mgp->rx_big.fill_cnt - mgp->rx_big.cnt < myri10ge_fill_thresh)
-		myri10ge_alloc_rx_pages(mgp, &mgp->rx_big, mgp->big_bytes, 0);
+	if (ss->rx_big.fill_cnt - ss->rx_big.cnt < myri10ge_fill_thresh)
+		myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0);
 
 	return work_done;
 }
 
 static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp)
 {
-	struct mcp_irq_data *stats = mgp->fw_stats;
+	struct mcp_irq_data *stats = mgp->ss.fw_stats;
 
 	if (unlikely(stats->stats_updated)) {
 		unsigned link_up = ntohl(stats->link_up);
@@ -1227,9 +1247,9 @@ static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp)
 			}
 		}
 		if (mgp->rdma_tags_available !=
-		    ntohl(mgp->fw_stats->rdma_tags_available)) {
+		    ntohl(stats->rdma_tags_available)) {
 			mgp->rdma_tags_available =
-			    ntohl(mgp->fw_stats->rdma_tags_available);
+			    ntohl(stats->rdma_tags_available);
 			printk(KERN_WARNING "myri10ge: %s: RDMA timed out! "
 			       "%d tags left\n", mgp->dev->name,
 			       mgp->rdma_tags_available);
@@ -1242,26 +1262,27 @@ static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp)
 
 static int myri10ge_poll(struct napi_struct *napi, int budget)
 {
-	struct myri10ge_priv *mgp =
-	    container_of(napi, struct myri10ge_priv, napi);
-	struct net_device *netdev = mgp->dev;
+	struct myri10ge_slice_state *ss =
+	    container_of(napi, struct myri10ge_slice_state, napi);
+	struct net_device *netdev = ss->mgp->dev;
 	int work_done;
 
 	/* process as many rx events as NAPI will allow */
-	work_done = myri10ge_clean_rx_done(mgp, budget);
+	work_done = myri10ge_clean_rx_done(ss, budget);
 
 	if (work_done < budget) {
 		netif_rx_complete(netdev, napi);
-		put_be32(htonl(3), mgp->irq_claim);
+		put_be32(htonl(3), ss->irq_claim);
 	}
 	return work_done;
 }
 
 static irqreturn_t myri10ge_intr(int irq, void *arg)
 {
-	struct myri10ge_priv *mgp = arg;
-	struct mcp_irq_data *stats = mgp->fw_stats;
-	struct myri10ge_tx_buf *tx = &mgp->tx;
+	struct myri10ge_slice_state *ss = arg;
+	struct myri10ge_priv *mgp = ss->mgp;
+	struct mcp_irq_data *stats = ss->fw_stats;
+	struct myri10ge_tx_buf *tx = &ss->tx;
 	u32 send_done_count;
 	int i;
 
@@ -1272,7 +1293,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 	/* low bit indicates receives are present, so schedule
 	 * napi poll handler */
 	if (stats->valid & 1)
-		netif_rx_schedule(mgp->dev, &mgp->napi);
+		netif_rx_schedule(ss->dev, &ss->napi);
 
 	if (!mgp->msi_enabled) {
 		put_be32(0, mgp->irq_deassert);
@@ -1289,7 +1310,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 		/* check for transmit completes and receives */
 		send_done_count = ntohl(stats->send_done_count);
 		if (send_done_count != tx->pkt_done)
-			myri10ge_tx_done(mgp, (int)send_done_count);
+			myri10ge_tx_done(ss, (int)send_done_count);
 		if (unlikely(i > myri10ge_max_irq_loops)) {
 			printk(KERN_WARNING "myri10ge: %s: irq stuck?\n",
 			       mgp->dev->name);
@@ -1304,7 +1325,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 
 	myri10ge_check_statblock(mgp);
 
-	put_be32(htonl(3), mgp->irq_claim + 1);
+	put_be32(htonl(3), ss->irq_claim + 1);
 	return (IRQ_HANDLED);
 }
 
@@ -1409,10 +1430,10 @@ myri10ge_get_ringparam(struct net_device *netdev,
 {
 	struct myri10ge_priv *mgp = netdev_priv(netdev);
 
-	ring->rx_mini_max_pending = mgp->rx_small.mask + 1;
-	ring->rx_max_pending = mgp->rx_big.mask + 1;
+	ring->rx_mini_max_pending = mgp->ss.rx_small.mask + 1;
+	ring->rx_max_pending = mgp->ss.rx_big.mask + 1;
 	ring->rx_jumbo_max_pending = 0;
-	ring->tx_max_pending = mgp->rx_small.mask + 1;
+	ring->tx_max_pending = mgp->ss.rx_small.mask + 1;
 	ring->rx_mini_pending = ring->rx_mini_max_pending;
 	ring->rx_pending = ring->rx_max_pending;
 	ring->rx_jumbo_pending = ring->rx_jumbo_max_pending;
@@ -1452,7 +1473,7 @@ static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled)
 	return 0;
 }
 
-static const char myri10ge_gstrings_stats[][ETH_GSTRING_LEN] = {
+static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = {
 	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
 	"tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
 	"rx_length_errors", "rx_over_errors", "rx_crc_errors",
@@ -1462,28 +1483,39 @@ static const char myri10ge_gstrings_stats[][ETH_GSTRING_LEN] = {
 	/* device-specific stats */
 	"tx_boundary", "WC", "irq", "MSI",
 	"read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs",
-	"serial_number", "tx_pkt_start", "tx_pkt_done",
-	"tx_req", "tx_done", "rx_small_cnt", "rx_big_cnt",
-	"wake_queue", "stop_queue", "watchdog_resets", "tx_linearized",
+	"serial_number", "watchdog_resets",
 	"link_changes", "link_up", "dropped_link_overflow",
 	"dropped_link_error_or_filtered",
 	"dropped_pause", "dropped_bad_phy", "dropped_bad_crc32",
 	"dropped_unicast_filtered", "dropped_multicast_filtered",
 	"dropped_runt", "dropped_overrun", "dropped_no_small_buffer",
-	"dropped_no_big_buffer", "LRO aggregated", "LRO flushed",
+	"dropped_no_big_buffer"
+};
+
+static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = {
+	"----------- slice ---------",
+	"tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done",
+	"rx_small_cnt", "rx_big_cnt",
+	"wake_queue", "stop_queue", "tx_linearized", "LRO aggregated",
+	    "LRO flushed",
 	"LRO avg aggr", "LRO no_desc"
 };
 
 #define MYRI10GE_NET_STATS_LEN      21
-#define MYRI10GE_STATS_LEN	ARRAY_SIZE(myri10ge_gstrings_stats)
+#define MYRI10GE_MAIN_STATS_LEN  ARRAY_SIZE(myri10ge_gstrings_main_stats)
+#define MYRI10GE_SLICE_STATS_LEN  ARRAY_SIZE(myri10ge_gstrings_slice_stats)
 
 static void
 myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data)
 {
 	switch (stringset) {
 	case ETH_SS_STATS:
-		memcpy(data, *myri10ge_gstrings_stats,
-		       sizeof(myri10ge_gstrings_stats));
+		memcpy(data, *myri10ge_gstrings_main_stats,
+		       sizeof(myri10ge_gstrings_main_stats));
+		data += sizeof(myri10ge_gstrings_main_stats);
+		memcpy(data, *myri10ge_gstrings_slice_stats,
+		       sizeof(myri10ge_gstrings_slice_stats));
+		data += sizeof(myri10ge_gstrings_slice_stats);
 		break;
 	}
 }
@@ -1492,7 +1524,7 @@ static int myri10ge_get_sset_count(struct net_device *netdev, int sset)
 {
 	switch (sset) {
 	case ETH_SS_STATS:
-		return MYRI10GE_STATS_LEN;
+		return MYRI10GE_MAIN_STATS_LEN + MYRI10GE_SLICE_STATS_LEN;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1503,12 +1535,13 @@ myri10ge_get_ethtool_stats(struct net_device *netdev,
 			   struct ethtool_stats *stats, u64 * data)
 {
 	struct myri10ge_priv *mgp = netdev_priv(netdev);
+	struct myri10ge_slice_state *ss;
 	int i;
 
 	for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++)
 		data[i] = ((unsigned long *)&mgp->stats)[i];
 
-	data[i++] = (unsigned int)mgp->tx.boundary;
+	data[i++] = (unsigned int)mgp->tx_boundary;
 	data[i++] = (unsigned int)mgp->wc_enabled;
 	data[i++] = (unsigned int)mgp->pdev->irq;
 	data[i++] = (unsigned int)mgp->msi_enabled;
@@ -1516,40 +1549,44 @@ myri10ge_get_ethtool_stats(struct net_device *netdev,
 	data[i++] = (unsigned int)mgp->write_dma;
 	data[i++] = (unsigned int)mgp->read_write_dma;
 	data[i++] = (unsigned int)mgp->serial_number;
-	data[i++] = (unsigned int)mgp->tx.pkt_start;
-	data[i++] = (unsigned int)mgp->tx.pkt_done;
-	data[i++] = (unsigned int)mgp->tx.req;
-	data[i++] = (unsigned int)mgp->tx.done;
-	data[i++] = (unsigned int)mgp->rx_small.cnt;
-	data[i++] = (unsigned int)mgp->rx_big.cnt;
-	data[i++] = (unsigned int)mgp->wake_queue;
-	data[i++] = (unsigned int)mgp->stop_queue;
 	data[i++] = (unsigned int)mgp->watchdog_resets;
-	data[i++] = (unsigned int)mgp->tx_linearized;
 	data[i++] = (unsigned int)mgp->link_changes;
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->link_up);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_link_overflow);
-	data[i++] =
-	    (unsigned int)ntohl(mgp->fw_stats->dropped_link_error_or_filtered);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_pause);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_bad_phy);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_bad_crc32);
+
+	/* firmware stats are useful only in the first slice */
+	ss = &mgp->ss;
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow);
 	data[i++] =
-	    (unsigned int)ntohl(mgp->fw_stats->dropped_unicast_filtered);
+	    (unsigned int)ntohl(ss->fw_stats->dropped_link_error_or_filtered);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_pause);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_phy);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_crc32);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_unicast_filtered);
 	data[i++] =
-	    (unsigned int)ntohl(mgp->fw_stats->dropped_multicast_filtered);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_runt);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_overrun);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_no_small_buffer);
-	data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_no_big_buffer);
-	data[i++] = mgp->rx_done.lro_mgr.stats.aggregated;
-	data[i++] = mgp->rx_done.lro_mgr.stats.flushed;
-	if (mgp->rx_done.lro_mgr.stats.flushed)
-		data[i++] = mgp->rx_done.lro_mgr.stats.aggregated /
-		    mgp->rx_done.lro_mgr.stats.flushed;
+	    (unsigned int)ntohl(ss->fw_stats->dropped_multicast_filtered);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_runt);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_overrun);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer);
+	data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer);
+
+	data[i++] = 0;
+	data[i++] = (unsigned int)ss->tx.pkt_start;
+	data[i++] = (unsigned int)ss->tx.pkt_done;
+	data[i++] = (unsigned int)ss->tx.req;
+	data[i++] = (unsigned int)ss->tx.done;
+	data[i++] = (unsigned int)ss->rx_small.cnt;
+	data[i++] = (unsigned int)ss->rx_big.cnt;
+	data[i++] = (unsigned int)ss->tx.wake_queue;
+	data[i++] = (unsigned int)ss->tx.stop_queue;
+	data[i++] = (unsigned int)ss->tx.linearized;
+	data[i++] = ss->rx_done.lro_mgr.stats.aggregated;
+	data[i++] = ss->rx_done.lro_mgr.stats.flushed;
+	if (ss->rx_done.lro_mgr.stats.flushed)
+		data[i++] = ss->rx_done.lro_mgr.stats.aggregated /
+		    ss->rx_done.lro_mgr.stats.flushed;
 	else
 		data[i++] = 0;
-	data[i++] = mgp->rx_done.lro_mgr.stats.no_desc;
+	data[i++] = ss->rx_done.lro_mgr.stats.no_desc;
 }
 
 static void myri10ge_set_msglevel(struct net_device *netdev, u32 value)
@@ -1585,19 +1622,17 @@ static const struct ethtool_ops myri10ge_ethtool_ops = {
 	.get_msglevel = myri10ge_get_msglevel
 };
 
-static int myri10ge_allocate_rings(struct net_device *dev)
+static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
 {
-	struct myri10ge_priv *mgp;
+	struct myri10ge_priv *mgp = ss->mgp;
 	struct myri10ge_cmd cmd;
+	struct net_device *dev = mgp->dev;
 	int tx_ring_size, rx_ring_size;
 	int tx_ring_entries, rx_ring_entries;
 	int i, status;
 	size_t bytes;
 
-	mgp = netdev_priv(dev);
-
 	/* get ring sizes */
-
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0);
 	tx_ring_size = cmd.data0;
 	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0);
@@ -1607,144 +1642,142 @@ static int myri10ge_allocate_rings(struct net_device *dev)
 
 	tx_ring_entries = tx_ring_size / sizeof(struct mcp_kreq_ether_send);
 	rx_ring_entries = rx_ring_size / sizeof(struct mcp_dma_addr);
-	mgp->tx.mask = tx_ring_entries - 1;
-	mgp->rx_small.mask = mgp->rx_big.mask = rx_ring_entries - 1;
+	ss->tx.mask = tx_ring_entries - 1;
+	ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
 
 	status = -ENOMEM;
 
 	/* allocate the host shadow rings */
 
 	bytes = 8 + (MYRI10GE_MAX_SEND_DESC_TSO + 4)
-	    * sizeof(*mgp->tx.req_list);
-	mgp->tx.req_bytes = kzalloc(bytes, GFP_KERNEL);
-	if (mgp->tx.req_bytes == NULL)
+	    * sizeof(*ss->tx.req_list);
+	ss->tx.req_bytes = kzalloc(bytes, GFP_KERNEL);
+	if (ss->tx.req_bytes == NULL)
 		goto abort_with_nothing;
 
 	/* ensure req_list entries are aligned to 8 bytes */
-	mgp->tx.req_list = (struct mcp_kreq_ether_send *)
-	    ALIGN((unsigned long)mgp->tx.req_bytes, 8);
+	ss->tx.req_list = (struct mcp_kreq_ether_send *)
+	    ALIGN((unsigned long)ss->tx.req_bytes, 8);
 
-	bytes = rx_ring_entries * sizeof(*mgp->rx_small.shadow);
-	mgp->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
-	if (mgp->rx_small.shadow == NULL)
+	bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
+	ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
+	if (ss->rx_small.shadow == NULL)
 		goto abort_with_tx_req_bytes;
 
-	bytes = rx_ring_entries * sizeof(*mgp->rx_big.shadow);
-	mgp->rx_big.shadow = kzalloc(bytes, GFP_KERNEL);
-	if (mgp->rx_big.shadow == NULL)
+	bytes = rx_ring_entries * sizeof(*ss->rx_big.shadow);
+	ss->rx_big.shadow = kzalloc(bytes, GFP_KERNEL);
+	if (ss->rx_big.shadow == NULL)
 		goto abort_with_rx_small_shadow;
 
 	/* allocate the host info rings */
 
-	bytes = tx_ring_entries * sizeof(*mgp->tx.info);
-	mgp->tx.info = kzalloc(bytes, GFP_KERNEL);
-	if (mgp->tx.info == NULL)
+	bytes = tx_ring_entries * sizeof(*ss->tx.info);
+	ss->tx.info = kzalloc(bytes, GFP_KERNEL);
+	if (ss->tx.info == NULL)
 		goto abort_with_rx_big_shadow;
 
-	bytes = rx_ring_entries * sizeof(*mgp->rx_small.info);
-	mgp->rx_small.info = kzalloc(bytes, GFP_KERNEL);
-	if (mgp->rx_small.info == NULL)
+	bytes = rx_ring_entries * sizeof(*ss->rx_small.info);
+	ss->rx_small.info = kzalloc(bytes, GFP_KERNEL);
+	if (ss->rx_small.info == NULL)
 		goto abort_with_tx_info;
 
-	bytes = rx_ring_entries * sizeof(*mgp->rx_big.info);
-	mgp->rx_big.info = kzalloc(bytes, GFP_KERNEL);
-	if (mgp->rx_big.info == NULL)
+	bytes = rx_ring_entries * sizeof(*ss->rx_big.info);
+	ss->rx_big.info = kzalloc(bytes, GFP_KERNEL);
+	if (ss->rx_big.info == NULL)
 		goto abort_with_rx_small_info;
 
 	/* Fill the receive rings */
-	mgp->rx_big.cnt = 0;
-	mgp->rx_small.cnt = 0;
-	mgp->rx_big.fill_cnt = 0;
-	mgp->rx_small.fill_cnt = 0;
-	mgp->rx_small.page_offset = MYRI10GE_ALLOC_SIZE;
-	mgp->rx_big.page_offset = MYRI10GE_ALLOC_SIZE;
-	mgp->rx_small.watchdog_needed = 0;
-	mgp->rx_big.watchdog_needed = 0;
-	myri10ge_alloc_rx_pages(mgp, &mgp->rx_small,
+	ss->rx_big.cnt = 0;
+	ss->rx_small.cnt = 0;
+	ss->rx_big.fill_cnt = 0;
+	ss->rx_small.fill_cnt = 0;
+	ss->rx_small.page_offset = MYRI10GE_ALLOC_SIZE;
+	ss->rx_big.page_offset = MYRI10GE_ALLOC_SIZE;
+	ss->rx_small.watchdog_needed = 0;
+	ss->rx_big.watchdog_needed = 0;
+	myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
 				mgp->small_bytes + MXGEFW_PAD, 0);
 
-	if (mgp->rx_small.fill_cnt < mgp->rx_small.mask + 1) {
+	if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) {
 		printk(KERN_ERR "myri10ge: %s: alloced only %d small bufs\n",
-		       dev->name, mgp->rx_small.fill_cnt);
+		       dev->name, ss->rx_small.fill_cnt);
 		goto abort_with_rx_small_ring;
 	}
 
-	myri10ge_alloc_rx_pages(mgp, &mgp->rx_big, mgp->big_bytes, 0);
-	if (mgp->rx_big.fill_cnt < mgp->rx_big.mask + 1) {
+	myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0);
+	if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) {
 		printk(KERN_ERR "myri10ge: %s: alloced only %d big bufs\n",
-		       dev->name, mgp->rx_big.fill_cnt);
+		       dev->name, ss->rx_big.fill_cnt);
 		goto abort_with_rx_big_ring;
 	}
 
 	return 0;
 
 abort_with_rx_big_ring:
-	for (i = mgp->rx_big.cnt; i < mgp->rx_big.fill_cnt; i++) {
-		int idx = i & mgp->rx_big.mask;
-		myri10ge_unmap_rx_page(mgp->pdev, &mgp->rx_big.info[idx],
+	for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
+		int idx = i & ss->rx_big.mask;
+		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
 				       mgp->big_bytes);
-		put_page(mgp->rx_big.info[idx].page);
+		put_page(ss->rx_big.info[idx].page);
 	}
 
 abort_with_rx_small_ring:
-	for (i = mgp->rx_small.cnt; i < mgp->rx_small.fill_cnt; i++) {
-		int idx = i & mgp->rx_small.mask;
-		myri10ge_unmap_rx_page(mgp->pdev, &mgp->rx_small.info[idx],
+	for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
+		int idx = i & ss->rx_small.mask;
+		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
 				       mgp->small_bytes + MXGEFW_PAD);
-		put_page(mgp->rx_small.info[idx].page);
+		put_page(ss->rx_small.info[idx].page);
 	}
 
-	kfree(mgp->rx_big.info);
+	kfree(ss->rx_big.info);
 
 abort_with_rx_small_info:
-	kfree(mgp->rx_small.info);
+	kfree(ss->rx_small.info);
 
 abort_with_tx_info:
-	kfree(mgp->tx.info);
+	kfree(ss->tx.info);
 
 abort_with_rx_big_shadow:
-	kfree(mgp->rx_big.shadow);
+	kfree(ss->rx_big.shadow);
 
 abort_with_rx_small_shadow:
-	kfree(mgp->rx_small.shadow);
+	kfree(ss->rx_small.shadow);
 
 abort_with_tx_req_bytes:
-	kfree(mgp->tx.req_bytes);
-	mgp->tx.req_bytes = NULL;
-	mgp->tx.req_list = NULL;
+	kfree(ss->tx.req_bytes);
+	ss->tx.req_bytes = NULL;
+	ss->tx.req_list = NULL;
 
 abort_with_nothing:
 	return status;
 }
 
-static void myri10ge_free_rings(struct net_device *dev)
+static void myri10ge_free_rings(struct myri10ge_slice_state *ss)
 {
-	struct myri10ge_priv *mgp;
+	struct myri10ge_priv *mgp = ss->mgp;
 	struct sk_buff *skb;
 	struct myri10ge_tx_buf *tx;
 	int i, len, idx;
 
-	mgp = netdev_priv(dev);
-
-	for (i = mgp->rx_big.cnt; i < mgp->rx_big.fill_cnt; i++) {
-		idx = i & mgp->rx_big.mask;
-		if (i == mgp->rx_big.fill_cnt - 1)
-			mgp->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE;
-		myri10ge_unmap_rx_page(mgp->pdev, &mgp->rx_big.info[idx],
+	for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
+		idx = i & ss->rx_big.mask;
+		if (i == ss->rx_big.fill_cnt - 1)
+			ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE;
+		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
 				       mgp->big_bytes);
-		put_page(mgp->rx_big.info[idx].page);
+		put_page(ss->rx_big.info[idx].page);
 	}
 
-	for (i = mgp->rx_small.cnt; i < mgp->rx_small.fill_cnt; i++) {
-		idx = i & mgp->rx_small.mask;
-		if (i == mgp->rx_small.fill_cnt - 1)
-			mgp->rx_small.info[idx].page_offset =
+	for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
+		idx = i & ss->rx_small.mask;
+		if (i == ss->rx_small.fill_cnt - 1)
+			ss->rx_small.info[idx].page_offset =
 			    MYRI10GE_ALLOC_SIZE;
-		myri10ge_unmap_rx_page(mgp->pdev, &mgp->rx_small.info[idx],
+		myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
 				       mgp->small_bytes + MXGEFW_PAD);
-		put_page(mgp->rx_small.info[idx].page);
+		put_page(ss->rx_small.info[idx].page);
 	}
-	tx = &mgp->tx;
+	tx = &ss->tx;
 	while (tx->done != tx->req) {
 		idx = tx->done & tx->mask;
 		skb = tx->info[idx].skb;
@@ -1755,7 +1788,7 @@ static void myri10ge_free_rings(struct net_device *dev)
 		len = pci_unmap_len(&tx->info[idx], len);
 		pci_unmap_len_set(&tx->info[idx], len, 0);
 		if (skb) {
-			mgp->stats.tx_dropped++;
+			ss->stats.tx_dropped++;
 			dev_kfree_skb_any(skb);
 			if (len)
 				pci_unmap_single(mgp->pdev,
@@ -1770,19 +1803,19 @@ static void myri10ge_free_rings(struct net_device *dev)
 					       PCI_DMA_TODEVICE);
 		}
 	}
-	kfree(mgp->rx_big.info);
+	kfree(ss->rx_big.info);
 
-	kfree(mgp->rx_small.info);
+	kfree(ss->rx_small.info);
 
-	kfree(mgp->tx.info);
+	kfree(ss->tx.info);
 
-	kfree(mgp->rx_big.shadow);
+	kfree(ss->rx_big.shadow);
 
-	kfree(mgp->rx_small.shadow);
+	kfree(ss->rx_small.shadow);
 
-	kfree(mgp->tx.req_bytes);
-	mgp->tx.req_bytes = NULL;
-	mgp->tx.req_list = NULL;
+	kfree(ss->tx.req_bytes);
+	ss->tx.req_bytes = NULL;
+	ss->tx.req_list = NULL;
 }
 
 static int myri10ge_request_irq(struct myri10ge_priv *mgp)
@@ -1881,13 +1914,11 @@ myri10ge_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr,
 
 static int myri10ge_open(struct net_device *dev)
 {
-	struct myri10ge_priv *mgp;
+	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_cmd cmd;
 	struct net_lro_mgr *lro_mgr;
 	int status, big_pow2;
 
-	mgp = netdev_priv(dev);
-
 	if (mgp->running != MYRI10GE_ETH_STOPPED)
 		return -EBUSY;
 
@@ -1924,16 +1955,16 @@ static int myri10ge_open(struct net_device *dev)
 	/* get the lanai pointers to the send and receive rings */
 
 	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0);
-	mgp->tx.lanai =
+	mgp->ss.tx.lanai =
 	    (struct mcp_kreq_ether_send __iomem *)(mgp->sram + cmd.data0);
 
 	status |=
 	    myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd, 0);
-	mgp->rx_small.lanai =
+	mgp->ss.rx_small.lanai =
 	    (struct mcp_kreq_ether_recv __iomem *)(mgp->sram + cmd.data0);
 
 	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0);
-	mgp->rx_big.lanai =
+	mgp->ss.rx_big.lanai =
 	    (struct mcp_kreq_ether_recv __iomem *)(mgp->sram + cmd.data0);
 
 	if (status != 0) {
@@ -1945,15 +1976,15 @@ static int myri10ge_open(struct net_device *dev)
 	}
 
 	if (myri10ge_wcfifo && mgp->wc_enabled) {
-		mgp->tx.wc_fifo = (u8 __iomem *) mgp->sram + MXGEFW_ETH_SEND_4;
-		mgp->rx_small.wc_fifo =
+		mgp->ss.tx.wc_fifo = (u8 __iomem *) mgp->sram + MXGEFW_ETH_SEND_4;
+		mgp->ss.rx_small.wc_fifo =
 		    (u8 __iomem *) mgp->sram + MXGEFW_ETH_RECV_SMALL;
-		mgp->rx_big.wc_fifo =
+		mgp->ss.rx_big.wc_fifo =
 		    (u8 __iomem *) mgp->sram + MXGEFW_ETH_RECV_BIG;
 	} else {
-		mgp->tx.wc_fifo = NULL;
-		mgp->rx_small.wc_fifo = NULL;
-		mgp->rx_big.wc_fifo = NULL;
+		mgp->ss.tx.wc_fifo = NULL;
+		mgp->ss.rx_small.wc_fifo = NULL;
+		mgp->ss.rx_big.wc_fifo = NULL;
 	}
 
 	/* Firmware needs the big buff size as a power of 2.  Lie and
@@ -1970,7 +2001,7 @@ static int myri10ge_open(struct net_device *dev)
 		mgp->big_bytes = big_pow2;
 	}
 
-	status = myri10ge_allocate_rings(dev);
+	status = myri10ge_allocate_rings(&mgp->ss);
 	if (status != 0)
 		goto abort_with_irq;
 
@@ -1989,12 +2020,12 @@ static int myri10ge_open(struct net_device *dev)
 		goto abort_with_rings;
 	}
 
-	cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->fw_stats_bus);
-	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->fw_stats_bus);
+	cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->ss.fw_stats_bus);
+	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->ss.fw_stats_bus);
 	cmd.data2 = sizeof(struct mcp_irq_data);
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
 	if (status == -ENOSYS) {
-		dma_addr_t bus = mgp->fw_stats_bus;
+		dma_addr_t bus = mgp->ss.fw_stats_bus;
 		bus += offsetof(struct mcp_irq_data, send_done_count);
 		cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus);
 		cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus);
@@ -2015,20 +2046,20 @@ static int myri10ge_open(struct net_device *dev)
 	mgp->link_state = ~0U;
 	mgp->rdma_tags_available = 15;
 
-	lro_mgr = &mgp->rx_done.lro_mgr;
+	lro_mgr = &mgp->ss.rx_done.lro_mgr;
 	lro_mgr->dev = dev;
 	lro_mgr->features = LRO_F_NAPI;
 	lro_mgr->ip_summed = CHECKSUM_COMPLETE;
 	lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
 	lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS;
-	lro_mgr->lro_arr = mgp->rx_done.lro_desc;
+	lro_mgr->lro_arr = mgp->ss.rx_done.lro_desc;
 	lro_mgr->get_frag_header = myri10ge_get_frag_header;
 	lro_mgr->max_aggr = myri10ge_lro_max_pkts;
 	lro_mgr->frag_align_pad = 2;
 	if (lro_mgr->max_aggr > MAX_SKB_FRAGS)
 		lro_mgr->max_aggr = MAX_SKB_FRAGS;
 
-	napi_enable(&mgp->napi);	/* must happen prior to any irq */
+	napi_enable(&mgp->ss.napi);	/* must happen prior to any irq */
 
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0);
 	if (status) {
@@ -2037,8 +2068,8 @@ static int myri10ge_open(struct net_device *dev)
 		goto abort_with_rings;
 	}
 
-	mgp->wake_queue = 0;
-	mgp->stop_queue = 0;
+	mgp->ss.tx.wake_queue = 0;
+	mgp->ss.tx.stop_queue = 0;
 	mgp->running = MYRI10GE_ETH_RUNNING;
 	mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
 	add_timer(&mgp->watchdog_timer);
@@ -2046,7 +2077,7 @@ static int myri10ge_open(struct net_device *dev)
 	return 0;
 
 abort_with_rings:
-	myri10ge_free_rings(dev);
+	myri10ge_free_rings(&mgp->ss);
 
 abort_with_irq:
 	myri10ge_free_irq(mgp);
@@ -2058,21 +2089,19 @@ abort_with_nothing:
 
 static int myri10ge_close(struct net_device *dev)
 {
-	struct myri10ge_priv *mgp;
+	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_cmd cmd;
 	int status, old_down_cnt;
 
-	mgp = netdev_priv(dev);
-
 	if (mgp->running != MYRI10GE_ETH_RUNNING)
 		return 0;
 
-	if (mgp->tx.req_bytes == NULL)
+	if (mgp->ss.tx.req_bytes == NULL)
 		return 0;
 
 	del_timer_sync(&mgp->watchdog_timer);
 	mgp->running = MYRI10GE_ETH_STOPPING;
-	napi_disable(&mgp->napi);
+	napi_disable(&mgp->ss.napi);
 	netif_carrier_off(dev);
 	netif_stop_queue(dev);
 	old_down_cnt = mgp->down_cnt;
@@ -2088,7 +2117,7 @@ static int myri10ge_close(struct net_device *dev)
 
 	netif_tx_disable(dev);
 	myri10ge_free_irq(mgp);
-	myri10ge_free_rings(dev);
+	myri10ge_free_rings(&mgp->ss);
 
 	mgp->running = MYRI10GE_ETH_STOPPED;
 	return 0;
@@ -2184,7 +2213,7 @@ myri10ge_submit_req_wc(struct myri10ge_tx_buf *tx,
 
 /*
  * Transmit a packet.  We need to split the packet so that a single
- * segment does not cross myri10ge->tx.boundary, so this makes segment
+ * segment does not cross myri10ge->tx_boundary, so this makes segment
  * counting tricky.  So rather than try to count segments up front, we
  * just give up if there are too few segments to hold a reasonably
  * fragmented packet currently available.  If we run
@@ -2195,8 +2224,9 @@ myri10ge_submit_req_wc(struct myri10ge_tx_buf *tx,
 static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct myri10ge_priv *mgp = netdev_priv(dev);
+	struct myri10ge_slice_state *ss;
 	struct mcp_kreq_ether_send *req;
-	struct myri10ge_tx_buf *tx = &mgp->tx;
+	struct myri10ge_tx_buf *tx;
 	struct skb_frag_struct *frag;
 	dma_addr_t bus;
 	u32 low;
@@ -2207,6 +2237,9 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
 	int cum_len, seglen, boundary, rdma_count;
 	u8 flags, odd_flag;
 
+	/* always transmit through slot 0 */
+	ss = &mgp->ss;
+	tx = &ss->tx;
 again:
 	req = tx->req_list;
 	avail = tx->mask - 1 - (tx->req - tx->done);
@@ -2221,7 +2254,7 @@ again:
 
 	if ((unlikely(avail < max_segments))) {
 		/* we are out of transmit resources */
-		mgp->stop_queue++;
+		tx->stop_queue++;
 		netif_stop_queue(dev);
 		return 1;
 	}
@@ -2283,7 +2316,7 @@ again:
 			if (skb_padto(skb, ETH_ZLEN)) {
 				/* The packet is gone, so we must
 				 * return 0 */
-				mgp->stats.tx_dropped += 1;
+				ss->stats.tx_dropped += 1;
 				return 0;
 			}
 			/* adjust the len to account for the zero pad
@@ -2325,7 +2358,7 @@ again:
 
 	while (1) {
 		/* Break the SKB or Fragment up into pieces which
-		 * do not cross mgp->tx.boundary */
+		 * do not cross mgp->tx_boundary */
 		low = MYRI10GE_LOWPART_TO_U32(bus);
 		high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus));
 		while (len) {
@@ -2335,7 +2368,8 @@ again:
 			if (unlikely(count == max_segments))
 				goto abort_linearize;
 
-			boundary = (low + tx->boundary) & ~(tx->boundary - 1);
+			boundary =
+			    (low + mgp->tx_boundary) & ~(mgp->tx_boundary - 1);
 			seglen = boundary - low;
 			if (seglen > len)
 				seglen = len;
@@ -2419,7 +2453,7 @@ again:
 		myri10ge_submit_req_wc(tx, tx->req_list, count);
 	tx->pkt_start++;
 	if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
-		mgp->stop_queue++;
+		tx->stop_queue++;
 		netif_stop_queue(dev);
 	}
 	dev->trans_start = jiffies;
@@ -2461,12 +2495,12 @@ abort_linearize:
 	if (skb_linearize(skb))
 		goto drop;
 
-	mgp->tx_linearized++;
+	tx->linearized++;
 	goto again;
 
 drop:
 	dev_kfree_skb_any(skb);
-	mgp->stats.tx_dropped += 1;
+	ss->stats.tx_dropped += 1;
 	return 0;
 
 }
@@ -2474,7 +2508,7 @@ drop:
 static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev)
 {
 	struct sk_buff *segs, *curr;
-	struct myri10ge_priv *mgp = dev->priv;
+	struct myri10ge_priv *mgp = netdev_priv(dev);
 	int status;
 
 	segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6);
@@ -2514,14 +2548,13 @@ static struct net_device_stats *myri10ge_get_stats(struct net_device *dev)
 
 static void myri10ge_set_multicast_list(struct net_device *dev)
 {
+	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_cmd cmd;
-	struct myri10ge_priv *mgp;
 	struct dev_mc_list *mc_list;
 	__be32 data[2] = { 0, 0 };
 	int err;
 	DECLARE_MAC_BUF(mac);
 
-	mgp = netdev_priv(dev);
 	/* can be called from atomic contexts,
 	 * pass 1 to force atomicity in myri10ge_send_cmd() */
 	myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1);
@@ -2723,9 +2756,9 @@ static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp)
  * already been enabled, then it must use a firmware image which works
  * around unaligned completion packets (myri10ge_ethp_z8e.dat), and it
  * should also ensure that it never gives the device a Read-DMA which is
- * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
+ * larger than 2KB by setting the tx_boundary to 2KB.  If ECRC is
  * enabled, then the driver should use the aligned (myri10ge_eth_z8e.dat)
- * firmware image, and set tx.boundary to 4KB.
+ * firmware image, and set tx_boundary to 4KB.
  */
 
 static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
@@ -2734,7 +2767,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
 	struct device *dev = &pdev->dev;
 	int status;
 
-	mgp->tx.boundary = 4096;
+	mgp->tx_boundary = 4096;
 	/*
 	 * Verify the max read request size was set to 4KB
 	 * before trying the test with 4KB.
@@ -2746,7 +2779,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
 	}
 	if (status != 4096) {
 		dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status);
-		mgp->tx.boundary = 2048;
+		mgp->tx_boundary = 2048;
 	}
 	/*
 	 * load the optimized firmware (which assumes aligned PCIe
@@ -2779,7 +2812,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
 			 "Please install up to date fw\n");
 abort:
 	/* fall back to using the unaligned firmware */
-	mgp->tx.boundary = 2048;
+	mgp->tx_boundary = 2048;
 	mgp->fw_name = myri10ge_fw_unaligned;
 
 }
@@ -2800,7 +2833,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
 		if (link_width < 8) {
 			dev_info(&mgp->pdev->dev, "PCIE x%d Link\n",
 				 link_width);
-			mgp->tx.boundary = 4096;
+			mgp->tx_boundary = 4096;
 			mgp->fw_name = myri10ge_fw_aligned;
 		} else {
 			myri10ge_firmware_probe(mgp);
@@ -2809,12 +2842,12 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
 		if (myri10ge_force_firmware == 1) {
 			dev_info(&mgp->pdev->dev,
 				 "Assuming aligned completions (forced)\n");
-			mgp->tx.boundary = 4096;
+			mgp->tx_boundary = 4096;
 			mgp->fw_name = myri10ge_fw_aligned;
 		} else {
 			dev_info(&mgp->pdev->dev,
 				 "Assuming unaligned completions (forced)\n");
-			mgp->tx.boundary = 2048;
+			mgp->tx_boundary = 2048;
 			mgp->fw_name = myri10ge_fw_unaligned;
 		}
 	}
@@ -2931,6 +2964,7 @@ static void myri10ge_watchdog(struct work_struct *work)
 {
 	struct myri10ge_priv *mgp =
 	    container_of(work, struct myri10ge_priv, watchdog_work);
+	struct myri10ge_tx_buf *tx;
 	u32 reboot;
 	int status;
 	u16 cmd, vendor;
@@ -2980,15 +3014,16 @@ static void myri10ge_watchdog(struct work_struct *work)
 
 		printk(KERN_ERR "myri10ge: %s: device timeout, resetting\n",
 		       mgp->dev->name);
+		tx = &mgp->ss.tx;
 		printk(KERN_INFO "myri10ge: %s: %d %d %d %d %d\n",
-		       mgp->dev->name, mgp->tx.req, mgp->tx.done,
-		       mgp->tx.pkt_start, mgp->tx.pkt_done,
-		       (int)ntohl(mgp->fw_stats->send_done_count));
+		       mgp->dev->name, tx->req, tx->done,
+		       tx->pkt_start, tx->pkt_done,
+		       (int)ntohl(mgp->ss.fw_stats->send_done_count));
 		msleep(2000);
 		printk(KERN_INFO "myri10ge: %s: %d %d %d %d %d\n",
-		       mgp->dev->name, mgp->tx.req, mgp->tx.done,
-		       mgp->tx.pkt_start, mgp->tx.pkt_done,
-		       (int)ntohl(mgp->fw_stats->send_done_count));
+		       mgp->dev->name, tx->req, tx->done,
+		       tx->pkt_start, tx->pkt_done,
+		       (int)ntohl(mgp->ss.fw_stats->send_done_count));
 	}
 	rtnl_lock();
 	myri10ge_close(mgp->dev);
@@ -3011,28 +3046,31 @@ static void myri10ge_watchdog(struct work_struct *work)
 static void myri10ge_watchdog_timer(unsigned long arg)
 {
 	struct myri10ge_priv *mgp;
+	struct myri10ge_slice_state *ss;
 	u32 rx_pause_cnt;
 
 	mgp = (struct myri10ge_priv *)arg;
 
-	if (mgp->rx_small.watchdog_needed) {
-		myri10ge_alloc_rx_pages(mgp, &mgp->rx_small,
+	rx_pause_cnt = ntohl(mgp->ss.fw_stats->dropped_pause);
+
+	ss = &mgp->ss;
+	if (ss->rx_small.watchdog_needed) {
+		myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
 					mgp->small_bytes + MXGEFW_PAD, 1);
-		if (mgp->rx_small.fill_cnt - mgp->rx_small.cnt >=
+		if (ss->rx_small.fill_cnt - ss->rx_small.cnt >=
 		    myri10ge_fill_thresh)
-			mgp->rx_small.watchdog_needed = 0;
+			ss->rx_small.watchdog_needed = 0;
 	}
-	if (mgp->rx_big.watchdog_needed) {
-		myri10ge_alloc_rx_pages(mgp, &mgp->rx_big, mgp->big_bytes, 1);
-		if (mgp->rx_big.fill_cnt - mgp->rx_big.cnt >=
+	if (ss->rx_big.watchdog_needed) {
+		myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 1);
+		if (ss->rx_big.fill_cnt - ss->rx_big.cnt >=
 		    myri10ge_fill_thresh)
-			mgp->rx_big.watchdog_needed = 0;
+			ss->rx_big.watchdog_needed = 0;
 	}
-	rx_pause_cnt = ntohl(mgp->fw_stats->dropped_pause);
 
-	if (mgp->tx.req != mgp->tx.done &&
-	    mgp->tx.done == mgp->watchdog_tx_done &&
-	    mgp->watchdog_tx_req != mgp->watchdog_tx_done) {
+	if (ss->tx.req != ss->tx.done &&
+	    ss->tx.done == ss->watchdog_tx_done &&
+	    ss->watchdog_tx_req != ss->watchdog_tx_done) {
 		/* nic seems like it might be stuck.. */
 		if (rx_pause_cnt != mgp->watchdog_pause) {
 			if (net_ratelimit())
@@ -3047,8 +3085,8 @@ static void myri10ge_watchdog_timer(unsigned long arg)
 	/* rearm timer */
 	mod_timer(&mgp->watchdog_timer,
 		  jiffies + myri10ge_watchdog_timeout * HZ);
-	mgp->watchdog_tx_done = mgp->tx.done;
-	mgp->watchdog_tx_req = mgp->tx.req;
+	ss->watchdog_tx_done = ss->tx.done;
+	ss->watchdog_tx_req = ss->tx.req;
 	mgp->watchdog_pause = rx_pause_cnt;
 }
 
@@ -3072,7 +3110,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	mgp = netdev_priv(netdev);
 	mgp->dev = netdev;
-	netif_napi_add(netdev, &mgp->napi, myri10ge_poll, myri10ge_napi_weight);
+	netif_napi_add(netdev, &mgp->ss.napi, myri10ge_poll, myri10ge_napi_weight);
 	mgp->pdev = pdev;
 	mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
 	mgp->pause = myri10ge_flow_control;
@@ -3118,9 +3156,9 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (mgp->cmd == NULL)
 		goto abort_with_netdev;
 
-	mgp->fw_stats = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->fw_stats),
-					   &mgp->fw_stats_bus, GFP_KERNEL);
-	if (mgp->fw_stats == NULL)
+	mgp->ss.fw_stats = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->ss.fw_stats),
+					   &mgp->ss.fw_stats_bus, GFP_KERNEL);
+	if (mgp->ss.fw_stats == NULL)
 		goto abort_with_cmd;
 
 	mgp->board_span = pci_resource_len(pdev, 0);
@@ -3160,12 +3198,12 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->dev_addr[i] = mgp->mac_addr[i];
 
 	/* allocate rx done ring */
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->rx_done.entry);
-	mgp->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes,
-						&mgp->rx_done.bus, GFP_KERNEL);
-	if (mgp->rx_done.entry == NULL)
+	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
+	mgp->ss.rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes,
+						&mgp->ss.rx_done.bus, GFP_KERNEL);
+	if (mgp->ss.rx_done.entry == NULL)
 		goto abort_with_ioremap;
-	memset(mgp->rx_done.entry, 0, bytes);
+	memset(mgp->ss.rx_done.entry, 0, bytes);
 
 	myri10ge_select_firmware(mgp);
 
@@ -3225,7 +3263,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n",
 		 (mgp->msi_enabled ? "MSI" : "xPIC"),
-		 netdev->irq, mgp->tx.boundary, mgp->fw_name,
+		 netdev->irq, mgp->tx_boundary, mgp->fw_name,
 		 (mgp->wc_enabled ? "Enabled" : "Disabled"));
 
 	return 0;
@@ -3237,9 +3275,9 @@ abort_with_firmware:
 	myri10ge_dummy_rdma(mgp, 0);
 
 abort_with_rx_done:
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->rx_done.entry);
+	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
 	dma_free_coherent(&pdev->dev, bytes,
-			  mgp->rx_done.entry, mgp->rx_done.bus);
+			  mgp->ss.rx_done.entry, mgp->ss.rx_done.bus);
 
 abort_with_ioremap:
 	iounmap(mgp->sram);
@@ -3249,8 +3287,8 @@ abort_with_wc:
 	if (mgp->mtrr >= 0)
 		mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
 #endif
-	dma_free_coherent(&pdev->dev, sizeof(*mgp->fw_stats),
-			  mgp->fw_stats, mgp->fw_stats_bus);
+	dma_free_coherent(&pdev->dev, sizeof(*mgp->ss.fw_stats),
+			  mgp->ss.fw_stats, mgp->ss.fw_stats_bus);
 
 abort_with_cmd:
 	dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
@@ -3288,9 +3326,9 @@ static void myri10ge_remove(struct pci_dev *pdev)
 	/* avoid a memory leak */
 	pci_restore_state(pdev);
 
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->rx_done.entry);
+	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
 	dma_free_coherent(&pdev->dev, bytes,
-			  mgp->rx_done.entry, mgp->rx_done.bus);
+			  mgp->ss.rx_done.entry, mgp->ss.rx_done.bus);
 
 	iounmap(mgp->sram);
 
@@ -3298,8 +3336,8 @@ static void myri10ge_remove(struct pci_dev *pdev)
 	if (mgp->mtrr >= 0)
 		mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
 #endif
-	dma_free_coherent(&pdev->dev, sizeof(*mgp->fw_stats),
-			  mgp->fw_stats, mgp->fw_stats_bus);
+	dma_free_coherent(&pdev->dev, sizeof(*mgp->ss.fw_stats),
+			  mgp->ss.fw_stats, mgp->ss.fw_stats_bus);
 
 	dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
 			  mgp->cmd, mgp->cmd_bus);
-- 
cgit v0.10.2


From fa0a90d96b08856203435b051dd1c155b58ccd0f Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:20:25 +0200
Subject: myri10ge: cleanup retrieving of firmware capabilities

Add myri10ge_get_firmware_capabilities() to retrieve TSO6 and
interrupt slots capabilities from the firmware.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 5edcbfe..054168f 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -194,6 +194,7 @@ struct myri10ge_priv {
 	int csum_flag;		/* rx_csums?            */
 	int small_bytes;
 	int big_bytes;
+	int max_intr_slots;
 	struct net_device *dev;
 	struct net_device_stats stats;
 	spinlock_t stats_lock;
@@ -634,13 +635,38 @@ static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp)
 	return status;
 }
 
+int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp)
+{
+	struct myri10ge_cmd cmd;
+	int status;
+
+	/* probe for IPv6 TSO support */
+	mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
+	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
+				   &cmd, 0);
+	if (status == 0) {
+		mgp->max_tso6 = cmd.data0;
+		mgp->features |= NETIF_F_TSO6;
+	}
+
+	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0);
+	if (status != 0) {
+		dev_err(&mgp->pdev->dev,
+			"failed MXGEFW_CMD_GET_RX_RING_SIZE\n");
+		return -ENXIO;
+	}
+
+	mgp->max_intr_slots = 2 * (cmd.data0 / sizeof(struct mcp_dma_addr));
+
+	return 0;
+}
+
 static int myri10ge_load_firmware(struct myri10ge_priv *mgp)
 {
 	char __iomem *submit;
 	__be32 buf[16] __attribute__ ((__aligned__(8)));
 	u32 dma_low, dma_high, size;
 	int status, i;
-	struct myri10ge_cmd cmd;
 
 	size = 0;
 	status = myri10ge_load_hotplug_firmware(mgp, &size);
@@ -672,6 +698,8 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp)
 
 		mgp->fw_name = "adopted";
 		mgp->tx_boundary = 2048;
+		myri10ge_dummy_rdma(mgp, 1);
+		status = myri10ge_get_firmware_capabilities(mgp);
 		return status;
 	}
 
@@ -714,18 +742,10 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp)
 		dev_err(&mgp->pdev->dev, "handoff failed\n");
 		return -ENXIO;
 	}
-	dev_info(&mgp->pdev->dev, "handoff confirmed\n");
 	myri10ge_dummy_rdma(mgp, 1);
+	status = myri10ge_get_firmware_capabilities(mgp);
 
-	/* probe for IPv6 TSO support */
-	mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
-	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
-				   &cmd, 0);
-	if (status == 0) {
-		mgp->max_tso6 = cmd.data0;
-		mgp->features |= NETIF_F_TSO6;
-	}
-	return 0;
+	return status;
 }
 
 static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr)
-- 
cgit v0.10.2


From 014377a1df693ff30a9e8b69f0bbb0a38e601f75 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Fri, 9 May 2008 02:20:47 +0200
Subject: myri10ge: fix the number of interrupt slots

Fix a long-standing bug/misunderstanding between the
driver and the firmware.  The size of the interrupt
queue must be set to the number of rx slots (big + small),
and it should never have been a tunable.
Setting it too small results in chaos.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Andrew Gallatin <gallatin@myri.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 054168f..c91b12e 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -253,10 +253,6 @@ static int myri10ge_ecrc_enable = 1;
 module_param(myri10ge_ecrc_enable, int, S_IRUGO);
 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E");
 
-static int myri10ge_max_intr_slots = 1024;
-module_param(myri10ge_max_intr_slots, int, S_IRUGO);
-MODULE_PARM_DESC(myri10ge_max_intr_slots, "Interrupt queue slots");
-
 static int myri10ge_small_bytes = -1;	/* -1 == auto */
 module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets");
@@ -879,7 +875,7 @@ static int myri10ge_reset(struct myri10ge_priv *mgp)
 
 	/* Now exchange information about interrupts  */
 
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
+	bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
 	memset(mgp->ss.rx_done.entry, 0, bytes);
 	cmd.data0 = (u32) bytes;
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0);
@@ -1217,7 +1213,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
 		rx_packets += rx_ok;
 		rx_bytes += rx_ok * (unsigned long)length;
 		cnt++;
-		idx = cnt & (myri10ge_max_intr_slots - 1);
+		idx = cnt & (mgp->max_intr_slots - 1);
 		work_done++;
 	}
 	rx_done->idx = idx;
@@ -3218,7 +3214,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->dev_addr[i] = mgp->mac_addr[i];
 
 	/* allocate rx done ring */
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
+	bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
 	mgp->ss.rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes,
 						&mgp->ss.rx_done.bus, GFP_KERNEL);
 	if (mgp->ss.rx_done.entry == NULL)
@@ -3295,7 +3291,7 @@ abort_with_firmware:
 	myri10ge_dummy_rdma(mgp, 0);
 
 abort_with_rx_done:
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
+	bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
 	dma_free_coherent(&pdev->dev, bytes,
 			  mgp->ss.rx_done.entry, mgp->ss.rx_done.bus);
 
@@ -3346,7 +3342,7 @@ static void myri10ge_remove(struct pci_dev *pdev)
 	/* avoid a memory leak */
 	pci_restore_state(pdev);
 
-	bytes = myri10ge_max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
+	bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry);
 	dma_free_coherent(&pdev->dev, bytes,
 			  mgp->ss.rx_done.entry, mgp->ss.rx_done.bus);
 
-- 
cgit v0.10.2


From 48c4b6dbb7e246957e13302668acf7c77e4f8b3a Mon Sep 17 00:00:00 2001
From: Divy Le Ray <divy@chelsio.com>
Date: Tue, 6 May 2008 19:25:56 -0700
Subject: cxgb3 - fix port up/down error path

Fix faiures path when ports are stopped and restarted
in EEH recovery.

Signed-off-by: Divy Le Ray <divy@chelsio.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 4fdb13f..acebe43 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -71,6 +71,7 @@ enum {				/* adapter flags */
 	USING_MSIX = (1 << 2),
 	QUEUES_BOUND = (1 << 3),
 	TP_PARITY_INIT = (1 << 4),
+	NAPI_INIT = (1 << 5),
 };
 
 struct fl_pg_chunk {
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index ce949d5..d67fc10 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -421,6 +421,13 @@ static void init_napi(struct adapter *adap)
 			netif_napi_add(qs->netdev, &qs->napi, qs->napi.poll,
 				       64);
 	}
+
+	/*
+	 * netif_napi_add() can be called only once per napi_struct because it
+	 * adds each new napi_struct to a list.  Be careful not to call it a
+	 * second time, e.g., during EEH recovery, by making a note of it.
+	 */
+	adap->flags |= NAPI_INIT;
 }
 
 /*
@@ -896,7 +903,8 @@ static int cxgb_up(struct adapter *adap)
 			goto out;
 
 		setup_rss(adap);
-		init_napi(adap);
+		if (!(adap->flags & NAPI_INIT))
+			init_napi(adap);
 		adap->flags |= FULL_INIT_DONE;
 	}
 
@@ -999,7 +1007,7 @@ static int offload_open(struct net_device *dev)
 		return 0;
 
 	if (!adap_up && (err = cxgb_up(adapter)) < 0)
-		return err;
+		goto out;
 
 	t3_tp_set_offload_mode(adapter, 1);
 	tdev->lldev = adapter->port[0];
@@ -1061,10 +1069,8 @@ static int cxgb_open(struct net_device *dev)
 	int other_ports = adapter->open_device_map & PORT_MASK;
 	int err;
 
-	if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) {
-		quiesce_rx(adapter);
+	if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0)
 		return err;
-	}
 
 	set_bit(pi->port_id, &adapter->open_device_map);
 	if (is_offload(adapter) && !ofld_disable) {
@@ -2431,7 +2437,7 @@ static pci_ers_result_t t3_io_error_detected(struct pci_dev *pdev,
 
 	pci_disable_device(pdev);
 
-	/* Request a slot slot reset. */
+	/* Request a slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
 }
 
@@ -2448,13 +2454,16 @@ static pci_ers_result_t t3_io_slot_reset(struct pci_dev *pdev)
 	if (pci_enable_device(pdev)) {
 		dev_err(&pdev->dev,
 			"Cannot re-enable PCI device after reset.\n");
-		return PCI_ERS_RESULT_DISCONNECT;
+		goto err;
 	}
 	pci_set_master(pdev);
 
-	t3_prep_adapter(adapter, adapter->params.info, 1);
+	if (t3_prep_adapter(adapter, adapter->params.info, 1))
+		goto err;
 
 	return PCI_ERS_RESULT_RECOVERED;
+err:
+	return PCI_ERS_RESULT_DISCONNECT;
 }
 
 /**
@@ -2483,13 +2492,6 @@ static void t3_io_resume(struct pci_dev *pdev)
 			netif_device_attach(netdev);
 		}
 	}
-
-	if (is_offload(adapter)) {
-		__set_bit(OFFLOAD_DEVMAP_BIT, &adapter->registered_device_map);
-		if (offload_open(adapter->port[0]))
-			printk(KERN_WARNING
-			       "Could not bring back offload capabilities\n");
-	}
 }
 
 static struct pci_error_handlers t3_err_handler = {
-- 
cgit v0.10.2


From 204e2f98c2d13f869b8541f3c57c7314f75cab11 Mon Sep 17 00:00:00 2001
From: Divy Le Ray <divy@chelsio.com>
Date: Tue, 6 May 2008 19:26:01 -0700
Subject: cxgb3 - fix EEH

Reset the chip when the PCI link goes down.
Preserve the napi structure when a sge qset's resources are freed.
Replay only HW initialization when the chip comes out of reset.

Signed-off-by: Divy Le ray <divy@chelsio.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h
index 91ee727..579bee4 100644
--- a/drivers/net/cxgb3/common.h
+++ b/drivers/net/cxgb3/common.h
@@ -698,6 +698,7 @@ void mac_prep(struct cmac *mac, struct adapter *adapter, int index);
 void early_hw_init(struct adapter *adapter, const struct adapter_info *ai);
 int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
 		    int reset);
+int t3_replay_prep_adapter(struct adapter *adapter);
 void t3_led_ready(struct adapter *adapter);
 void t3_fatal_err(struct adapter *adapter);
 void t3_set_vlan_accel(struct adapter *adapter, unsigned int ports, int on);
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index d67fc10..3a31272 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -2430,9 +2430,6 @@ static pci_ers_result_t t3_io_error_detected(struct pci_dev *pdev,
 	    test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
 		offload_close(&adapter->tdev);
 
-	/* Free sge resources */
-	t3_free_sge_resources(adapter);
-
 	adapter->flags &= ~FULL_INIT_DONE;
 
 	pci_disable_device(pdev);
@@ -2457,8 +2454,12 @@ static pci_ers_result_t t3_io_slot_reset(struct pci_dev *pdev)
 		goto err;
 	}
 	pci_set_master(pdev);
+	pci_restore_state(pdev);
+
+	/* Free sge resources */
+	t3_free_sge_resources(adapter);
 
-	if (t3_prep_adapter(adapter, adapter->params.info, 1))
+	if (t3_replay_prep_adapter(adapter))
 		goto err;
 
 	return PCI_ERS_RESULT_RECOVERED;
@@ -2610,6 +2611,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 	}
 
 	pci_set_master(pdev);
+	pci_save_state(pdev);
 
 	mmio_start = pci_resource_start(pdev, 0);
 	mmio_len = pci_resource_len(pdev, 0);
diff --git a/drivers/net/cxgb3/regs.h b/drivers/net/cxgb3/regs.h
index 02dbbb3..5671788 100644
--- a/drivers/net/cxgb3/regs.h
+++ b/drivers/net/cxgb3/regs.h
@@ -444,6 +444,14 @@
 
 #define A_PCIE_CFG 0x88
 
+#define S_ENABLELINKDWNDRST    21
+#define V_ENABLELINKDWNDRST(x) ((x) << S_ENABLELINKDWNDRST)
+#define F_ENABLELINKDWNDRST    V_ENABLELINKDWNDRST(1U)
+
+#define S_ENABLELINKDOWNRST    20
+#define V_ENABLELINKDOWNRST(x) ((x) << S_ENABLELINKDOWNRST)
+#define F_ENABLELINKDOWNRST    V_ENABLELINKDOWNRST(1U)
+
 #define S_PCIE_CLIDECEN    16
 #define V_PCIE_CLIDECEN(x) ((x) << S_PCIE_CLIDECEN)
 #define F_PCIE_CLIDECEN    V_PCIE_CLIDECEN(1U)
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 98a6bbd..796eb30 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -539,6 +539,31 @@ static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 }
 
 /**
+ *	t3_reset_qset - reset a sge qset
+ *	@q: the queue set
+ *
+ *	Reset the qset structure.
+ *	the NAPI structure is preserved in the event of
+ *	the qset's reincarnation, for example during EEH recovery.
+ */
+static void t3_reset_qset(struct sge_qset *q)
+{
+	if (q->adap &&
+	    !(q->adap->flags & NAPI_INIT)) {
+		memset(q, 0, sizeof(*q));
+		return;
+	}
+
+	q->adap = NULL;
+	memset(&q->rspq, 0, sizeof(q->rspq));
+	memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
+	memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
+	q->txq_stopped = 0;
+	memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
+}
+
+
+/**
  *	free_qset - free the resources of an SGE queue set
  *	@adapter: the adapter owning the queue set
  *	@q: the queue set
@@ -594,7 +619,7 @@ static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 				  q->rspq.desc, q->rspq.phys_addr);
 	}
 
-	memset(q, 0, sizeof(*q));
+	t3_reset_qset(q);
 }
 
 /**
@@ -1365,7 +1390,7 @@ static void restart_ctrlq(unsigned long data)
  */
 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
 {
-	int ret; 
+	int ret;
 	local_bh_disable();
 	ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
 	local_bh_enable();
diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c
index a99496a..d405a93 100644
--- a/drivers/net/cxgb3/t3_hw.c
+++ b/drivers/net/cxgb3/t3_hw.c
@@ -3264,6 +3264,7 @@ static void config_pcie(struct adapter *adap)
 
 	t3_write_reg(adap, A_PCIE_PEX_ERR, 0xffffffff);
 	t3_set_reg_field(adap, A_PCIE_CFG, 0,
+			 F_ENABLELINKDWNDRST | F_ENABLELINKDOWNRST |
 			 F_PCIE_DMASTOPEN | F_PCIE_CLIDECEN);
 }
 
@@ -3655,3 +3656,30 @@ void t3_led_ready(struct adapter *adapter)
 	t3_set_reg_field(adapter, A_T3DBG_GPIO_EN, F_GPIO0_OUT_VAL,
 			 F_GPIO0_OUT_VAL);
 }
+
+int t3_replay_prep_adapter(struct adapter *adapter)
+{
+	const struct adapter_info *ai = adapter->params.info;
+	unsigned int i, j = 0;
+	int ret;
+
+	early_hw_init(adapter, ai);
+	ret = init_parity(adapter);
+	if (ret)
+		return ret;
+
+	for_each_port(adapter, i) {
+		struct port_info *p = adap2pinfo(adapter, i);
+		while (!adapter->params.vpd.port_type[j])
+			++j;
+
+		p->port_type->phy_prep(&p->phy, adapter, ai->phy_base_addr + j,
+					ai->mdio_ops);
+
+		p->phy.ops->power_down(&p->phy, 1);
+		++j;
+	}
+
+return 0;
+}
+
-- 
cgit v0.10.2


From ad5da7ab7be0a510ae69d533edf573d1ca6eec4b Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Wed, 7 May 2008 13:20:55 -0500
Subject: gianfar: Fix a bug where the pointer never moves for dma_unmap...

The loop that unmaps all of the TX Buffer Descriptors never actually
moves the txbd pointer, so we were just repeatedly unmapping the first one.

Signed-off-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 6f22f06..25bdd08 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -635,6 +635,8 @@ static void free_skb_resources(struct gfar_private *priv)
 			dev_kfree_skb_any(priv->tx_skbuff[i]);
 			priv->tx_skbuff[i] = NULL;
 		}
+
+		txbdp++;
 	}
 
 	kfree(priv->tx_skbuff);
-- 
cgit v0.10.2


From 3c82c30cd5963a4523a6ec5f32fc2d20a5bb672a Mon Sep 17 00:00:00 2001
From: Hannes Hering <hannes.hering@linux.vnet.ibm.com>
Date: Wed, 7 May 2008 14:43:01 +0200
Subject: memory: Introduce exports for memory notifiers

This patch introduces two exports to allow modules to use memory notifiers.

Signed-off-by: Hannes Hering <hering2@de.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 8ce6de5..937e825 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -53,11 +53,13 @@ int register_memory_notifier(struct notifier_block *nb)
 {
         return blocking_notifier_chain_register(&memory_chain, nb);
 }
+EXPORT_SYMBOL(register_memory_notifier);
 
 void unregister_memory_notifier(struct notifier_block *nb)
 {
         blocking_notifier_chain_unregister(&memory_chain, nb);
 }
+EXPORT_SYMBOL(unregister_memory_notifier);
 
 /*
  * register_memory - Setup a sysfs device for a memory block
-- 
cgit v0.10.2


From fb7b6ca2b6b7c23b52be143bdd5f55a23b9780c8 Mon Sep 17 00:00:00 2001
From: Hannes Hering <hannes.hering@linux.vnet.ibm.com>
Date: Wed, 7 May 2008 14:43:20 +0200
Subject: ehea: Add dependency to Kconfig

The new ehea memory hot plug implementation depends on MEMORY_HOTPLUG.

Signed-off-by: Hannes Hering <hering2@de.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d27f54a..9f6cc8a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2426,7 +2426,7 @@ config CHELSIO_T3
 
 config EHEA
 	tristate "eHEA Ethernet support"
-	depends on IBMEBUS && INET && SPARSEMEM
+	depends on IBMEBUS && INET && SPARSEMEM && MEMORY_HOTPLUG
 	select INET_LRO
 	---help---
 	  This driver supports the IBM pSeries eHEA ethernet adapter.
-- 
cgit v0.10.2


From 48cfb14f8b89d4d5b3df6c16f08b258686fb12ad Mon Sep 17 00:00:00 2001
From: Hannes Hering <hannes.hering@linux.vnet.ibm.com>
Date: Wed, 7 May 2008 14:43:36 +0200
Subject: ehea: Add DLPAR memory remove support

The eHEA driver uses the recently modified walk_memory_resource for powerpc
functionality to detect the memory layout. It further uses the memory hotplug
notifiers to catch memory hotplug events.

Signed-off-by: Hannes Hering <hering2@de.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index f5dacce..fe872fb 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -40,7 +40,7 @@
 #include <asm/io.h>
 
 #define DRV_NAME	"ehea"
-#define DRV_VERSION	"EHEA_0090"
+#define DRV_VERSION	"EHEA_0091"
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
@@ -118,6 +118,13 @@
 #define EHEA_MR_ACC_CTRL       0x00800000
 
 #define EHEA_BUSMAP_START      0x8000000000000000ULL
+#define EHEA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
+#define EHEA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
+#define EHEA_TOP_INDEX_SHIFT (EHEA_DIR_INDEX_SHIFT * 2)
+#define EHEA_MAP_ENTRIES (1 << EHEA_DIR_INDEX_SHIFT)
+#define EHEA_MAP_SIZE (0x10000)                   /* currently fixed map size */
+#define EHEA_INDEX_MASK (EHEA_MAP_ENTRIES - 1)
+
 
 #define EHEA_WATCH_DOG_TIMEOUT 10*HZ
 
@@ -192,10 +199,20 @@ struct h_epas {
 				   set to 0 if unused */
 };
 
-struct ehea_busmap {
-	unsigned int entries;		/* total number of entries */
-	unsigned int valid_sections;	/* number of valid sections */
-	u64 *vaddr;
+/*
+ * Memory map data structures
+ */
+struct ehea_dir_bmap
+{
+	u64 ent[EHEA_MAP_ENTRIES];
+};
+struct ehea_top_bmap
+{
+	struct ehea_dir_bmap *dir[EHEA_MAP_ENTRIES];
+};
+struct ehea_bmap
+{
+	struct ehea_top_bmap *top[EHEA_MAP_ENTRIES];
 };
 
 struct ehea_qp;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index f9bc21c..d1b6d4e 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -35,6 +35,7 @@
 #include <linux/if_ether.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
+#include <linux/memory.h>
 #include <asm/kexec.h>
 #include <linux/mutex.h>
 
@@ -3503,6 +3504,24 @@ void ehea_crash_handler(void)
 					      0, H_DEREG_BCMC);
 }
 
+static int ehea_mem_notifier(struct notifier_block *nb,
+                             unsigned long action, void *data)
+{
+	switch (action) {
+	case MEM_OFFLINE:
+		ehea_info("memory has been removed");
+		ehea_rereg_mrs(NULL);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ehea_mem_nb = {
+	.notifier_call = ehea_mem_notifier,
+};
+
 static int ehea_reboot_notifier(struct notifier_block *nb,
 				unsigned long action, void *unused)
 {
@@ -3581,6 +3600,10 @@ int __init ehea_module_init(void)
 	if (ret)
 		ehea_info("failed registering reboot notifier");
 
+	ret = register_memory_notifier(&ehea_mem_nb);
+	if (ret)
+		ehea_info("failed registering memory remove notifier");
+
 	ret = crash_shutdown_register(&ehea_crash_handler);
 	if (ret)
 		ehea_info("failed registering crash handler");
@@ -3604,6 +3627,7 @@ int __init ehea_module_init(void)
 out3:
 	ibmebus_unregister_driver(&ehea_driver);
 out2:
+	unregister_memory_notifier(&ehea_mem_nb);
 	unregister_reboot_notifier(&ehea_reboot_nb);
 	crash_shutdown_unregister(&ehea_crash_handler);
 out:
@@ -3621,6 +3645,7 @@ static void __exit ehea_module_exit(void)
 	ret = crash_shutdown_unregister(&ehea_crash_handler);
 	if (ret)
 		ehea_info("failed unregistering crash handler");
+	unregister_memory_notifier(&ehea_mem_nb);
 	kfree(ehea_fw_handles.arr);
 	kfree(ehea_bcmc_regs.arr);
 	ehea_destroy_busmap();
diff --git a/drivers/net/ehea/ehea_qmr.c b/drivers/net/ehea/ehea_qmr.c
index d522e90..140f05b 100644
--- a/drivers/net/ehea/ehea_qmr.c
+++ b/drivers/net/ehea/ehea_qmr.c
@@ -31,8 +31,8 @@
 #include "ehea_phyp.h"
 #include "ehea_qmr.h"
 
+struct ehea_bmap *ehea_bmap = NULL;
 
-struct ehea_busmap ehea_bmap = { 0, 0, NULL };
 
 
 static void *hw_qpageit_get_inc(struct hw_queue *queue)
@@ -559,125 +559,253 @@ int ehea_destroy_qp(struct ehea_qp *qp)
 	return 0;
 }
 
-int ehea_create_busmap(void)
+static inline int ehea_calc_index(unsigned long i, unsigned long s)
 {
-	u64 vaddr = EHEA_BUSMAP_START;
-	unsigned long high_section_index = 0;
-	int i;
+	return (i >> s) & EHEA_INDEX_MASK;
+}
 
-	/*
-	 * Sections are not in ascending order -> Loop over all sections and
-	 * find the highest PFN to compute the required map size.
-	*/
-	ehea_bmap.valid_sections = 0;
+static inline int ehea_init_top_bmap(struct ehea_top_bmap *ehea_top_bmap,
+				     int dir)
+{
+	if(!ehea_top_bmap->dir[dir]) {
+		ehea_top_bmap->dir[dir] =
+			kzalloc(sizeof(struct ehea_dir_bmap), GFP_KERNEL);
+		if (!ehea_top_bmap->dir[dir])
+			return -ENOMEM;
+	}
+	return 0;
+}
 
-	for (i = 0; i < NR_MEM_SECTIONS; i++)
-		if (valid_section_nr(i))
-			high_section_index = i;
+static inline int ehea_init_bmap(struct ehea_bmap *ehea_bmap, int top, int dir)
+{
+	if(!ehea_bmap->top[top]) {
+		ehea_bmap->top[top] =
+			kzalloc(sizeof(struct ehea_top_bmap), GFP_KERNEL);
+		if (!ehea_bmap->top[top])
+			return -ENOMEM;
+	}
+	return ehea_init_top_bmap(ehea_bmap->top[top], dir);
+}
 
-	ehea_bmap.entries = high_section_index + 1;
-	ehea_bmap.vaddr = vmalloc(ehea_bmap.entries * sizeof(*ehea_bmap.vaddr));
+static int ehea_create_busmap_callback(unsigned long pfn,
+				       unsigned long nr_pages, void *arg)
+{
+	unsigned long i, mr_len, start_section, end_section;
+	start_section = (pfn * PAGE_SIZE) / EHEA_SECTSIZE;
+	end_section = start_section + ((nr_pages * PAGE_SIZE) / EHEA_SECTSIZE);
+	mr_len = *(unsigned long *)arg;
 
-	if (!ehea_bmap.vaddr)
+	ehea_bmap = kzalloc(sizeof(struct ehea_bmap), GFP_KERNEL);
+	if (!ehea_bmap)
 		return -ENOMEM;
 
-	for (i = 0 ; i < ehea_bmap.entries; i++) {
-		unsigned long pfn = section_nr_to_pfn(i);
+	for (i = start_section; i < end_section; i++) {
+		int ret;
+		int top, dir, idx;
+		u64 vaddr;
+
+		top = ehea_calc_index(i, EHEA_TOP_INDEX_SHIFT);
+		dir = ehea_calc_index(i, EHEA_DIR_INDEX_SHIFT);
+
+		ret = ehea_init_bmap(ehea_bmap, top, dir);
+		if(ret)
+			return ret;
 
-		if (pfn_valid(pfn)) {
-			ehea_bmap.vaddr[i] = vaddr;
-			vaddr += EHEA_SECTSIZE;
-			ehea_bmap.valid_sections++;
-		} else
-			ehea_bmap.vaddr[i] = 0;
+		idx = i & EHEA_INDEX_MASK;
+		vaddr = EHEA_BUSMAP_START + mr_len + i * EHEA_SECTSIZE;
+
+		ehea_bmap->top[top]->dir[dir]->ent[idx] = vaddr;
 	}
 
+	mr_len += nr_pages * PAGE_SIZE;
+	*(unsigned long *)arg = mr_len;
+
 	return 0;
 }
 
+static unsigned long ehea_mr_len;
+
+static DEFINE_MUTEX(ehea_busmap_mutex);
+
+int ehea_create_busmap(void)
+{
+	int ret;
+	mutex_lock(&ehea_busmap_mutex);
+	ehea_mr_len = 0;
+	ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, &ehea_mr_len,
+				   ehea_create_busmap_callback);
+	mutex_unlock(&ehea_busmap_mutex);
+	return ret;
+}
+
 void ehea_destroy_busmap(void)
 {
-	vfree(ehea_bmap.vaddr);
+	int top, dir;
+	mutex_lock(&ehea_busmap_mutex);
+	if (!ehea_bmap)
+		goto out_destroy;
+
+	for (top = 0; top < EHEA_MAP_ENTRIES; top++) {
+		if (!ehea_bmap->top[top])
+			continue;
+
+		for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) {
+			if (!ehea_bmap->top[top]->dir[dir])
+				continue;
+
+			kfree(ehea_bmap->top[top]->dir[dir]);
+		}
+
+		kfree(ehea_bmap->top[top]);
+	}
+
+	kfree(ehea_bmap);
+	ehea_bmap = NULL;
+out_destroy:	
+	mutex_unlock(&ehea_busmap_mutex);
 }
 
 u64 ehea_map_vaddr(void *caddr)
 {
-	u64 mapped_addr;
-	unsigned long index = __pa(caddr) >> SECTION_SIZE_BITS;
-
-	if (likely(index < ehea_bmap.entries)) {
-		mapped_addr = ehea_bmap.vaddr[index];
-		if (likely(mapped_addr))
-			mapped_addr |= (((unsigned long)caddr)
-					& (EHEA_SECTSIZE - 1));
-		else
-			mapped_addr = -1;
-	} else
-		mapped_addr = -1;
-
-	if (unlikely(mapped_addr == -1))
-		if (!test_and_set_bit(__EHEA_STOP_XFER, &ehea_driver_flags))
-			schedule_work(&ehea_rereg_mr_task);
-
-	return mapped_addr;
+	int top, dir, idx;
+	unsigned long index, offset;
+
+	if (!ehea_bmap)
+		return EHEA_INVAL_ADDR;
+
+	index = virt_to_abs(caddr) >> SECTION_SIZE_BITS;
+	top = (index >> EHEA_TOP_INDEX_SHIFT) & EHEA_INDEX_MASK;
+	if (!ehea_bmap->top[top])
+		return EHEA_INVAL_ADDR;
+
+	dir = (index >> EHEA_DIR_INDEX_SHIFT) & EHEA_INDEX_MASK;
+	if (!ehea_bmap->top[top]->dir[dir])
+		return EHEA_INVAL_ADDR;
+
+	idx = index & EHEA_INDEX_MASK;
+	if (!ehea_bmap->top[top]->dir[dir]->ent[idx])
+		return EHEA_INVAL_ADDR;
+
+	offset = (unsigned long)caddr & (EHEA_SECTSIZE - 1);
+	return ehea_bmap->top[top]->dir[dir]->ent[idx] | offset;
+}
+
+static inline void *ehea_calc_sectbase(int top, int dir, int idx)
+{
+	unsigned long ret = idx;
+	ret |= dir << EHEA_DIR_INDEX_SHIFT;
+	ret |= top << EHEA_TOP_INDEX_SHIFT;
+	return abs_to_virt(ret << SECTION_SIZE_BITS);
+}
+
+static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt,
+			       struct ehea_adapter *adapter,
+			       struct ehea_mr *mr)
+{
+	void *pg;
+	u64 j, m, hret;
+	unsigned long k = 0;
+	u64 pt_abs = virt_to_abs(pt);
+
+	void *sectbase = ehea_calc_sectbase(top, dir, idx);
+
+	for (j = 0; j < (EHEA_PAGES_PER_SECTION / EHEA_MAX_RPAGE); j++) {
+
+		for (m = 0; m < EHEA_MAX_RPAGE; m++) {
+			pg = sectbase + ((k++) * EHEA_PAGESIZE);
+			pt[m] = virt_to_abs(pg);
+		}
+		hret = ehea_h_register_rpage_mr(adapter->handle, mr->handle, 0,
+						0, pt_abs, EHEA_MAX_RPAGE);
+
+		if ((hret != H_SUCCESS)
+		    && (hret != H_PAGE_REGISTERED)) {
+			ehea_h_free_resource(adapter->handle, mr->handle,
+					     FORCE_FREE);
+			ehea_error("register_rpage_mr failed");
+			return hret;
+		}
+	}
+	return hret;
+}
+
+static u64 ehea_reg_mr_sections(int top, int dir, u64 *pt,
+				struct ehea_adapter *adapter,
+				struct ehea_mr *mr)
+{
+	u64 hret = H_SUCCESS;
+	int idx;
+
+	for (idx = 0; idx < EHEA_MAP_ENTRIES; idx++) {
+		if (!ehea_bmap->top[top]->dir[dir]->ent[idx])
+			continue;
+		
+		hret = ehea_reg_mr_section(top, dir, idx, pt, adapter, mr);
+		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
+			    	return hret;
+	}
+	return hret;
+}
+
+static u64 ehea_reg_mr_dir_sections(int top, u64 *pt,
+				    struct ehea_adapter *adapter,
+				    struct ehea_mr *mr)
+{
+	u64 hret = H_SUCCESS;
+	int dir;
+
+	for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) {
+		if (!ehea_bmap->top[top]->dir[dir])
+			continue;
+
+		hret = ehea_reg_mr_sections(top, dir, pt, adapter, mr);
+		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
+			    	return hret;
+	}
+	return hret;
 }
 
 int ehea_reg_kernel_mr(struct ehea_adapter *adapter, struct ehea_mr *mr)
 {
 	int ret;
 	u64 *pt;
-	void *pg;
-	u64 hret, pt_abs, i, j, m, mr_len;
+	u64 hret;
 	u32 acc_ctrl = EHEA_MR_ACC_CTRL;
 
-	mr_len = ehea_bmap.valid_sections * EHEA_SECTSIZE;
+	unsigned long top;
 
-	pt =  kzalloc(PAGE_SIZE, GFP_KERNEL);
+	pt = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!pt) {
 		ehea_error("no mem");
 		ret = -ENOMEM;
 		goto out;
 	}
-	pt_abs = virt_to_abs(pt);
 
-	hret = ehea_h_alloc_resource_mr(adapter->handle,
-					EHEA_BUSMAP_START, mr_len,
-					acc_ctrl, adapter->pd,
+	hret = ehea_h_alloc_resource_mr(adapter->handle, EHEA_BUSMAP_START,
+					ehea_mr_len, acc_ctrl, adapter->pd,
 					&mr->handle, &mr->lkey);
+
 	if (hret != H_SUCCESS) {
 		ehea_error("alloc_resource_mr failed");
 		ret = -EIO;
 		goto out;
 	}
 
-	for (i = 0 ; i < ehea_bmap.entries; i++)
-		if (ehea_bmap.vaddr[i]) {
-			void *sectbase = __va(i << SECTION_SIZE_BITS);
-			unsigned long k = 0;
-
-			for (j = 0; j < (EHEA_PAGES_PER_SECTION /
-					 EHEA_MAX_RPAGE); j++) {
-
-				for (m = 0; m < EHEA_MAX_RPAGE; m++) {
-					pg = sectbase + ((k++) * EHEA_PAGESIZE);
-					pt[m] = virt_to_abs(pg);
-				}
-
-				hret = ehea_h_register_rpage_mr(adapter->handle,
-								mr->handle,
-								0, 0, pt_abs,
-								EHEA_MAX_RPAGE);
-				if ((hret != H_SUCCESS)
-				    && (hret != H_PAGE_REGISTERED)) {
-					ehea_h_free_resource(adapter->handle,
-							     mr->handle,
-							     FORCE_FREE);
-					ehea_error("register_rpage_mr failed");
-					ret = -EIO;
-					goto out;
-				}
-			}
-		}
+	if (!ehea_bmap) {
+		ehea_h_free_resource(adapter->handle, mr->handle, FORCE_FREE);
+		ehea_error("no busmap available");
+		ret = -EIO;
+		goto out;
+	}
+
+	for (top = 0; top < EHEA_MAP_ENTRIES; top++) {
+		if (!ehea_bmap->top[top])
+			continue;
+
+		hret = ehea_reg_mr_dir_sections(top, pt, adapter, mr);
+		if((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
+			break;
+	}
 
 	if (hret != H_SUCCESS) {
 		ehea_h_free_resource(adapter->handle, mr->handle, FORCE_FREE);
-- 
cgit v0.10.2


From b9b39b625cf57cd0ea998717598b68963cbec3cb Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 12:51:12 +0100
Subject: [netdrvr] sfc: Add TSO support

The SFC4000 controller does not have hardware support for TSO, and the
core GSO code incurs a high cost in allocating and freeing skbs.  This
TSO implementation uses lightweight packet header structures and is
substantially faster.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 59edcf7..418f2e5 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1873,6 +1873,7 @@ static int efx_init_struct(struct efx_nic *efx, struct efx_nic_type *type,
 		tx_queue->queue = i;
 		tx_queue->buffer = NULL;
 		tx_queue->channel = &efx->channel[0]; /* for safety */
+		tx_queue->tso_headers_free = NULL;
 	}
 	for (i = 0; i < EFX_MAX_RX_QUEUES; i++) {
 		rx_queue = &efx->rx_queue[i];
@@ -2071,7 +2072,8 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
 	net_dev = alloc_etherdev(sizeof(*efx));
 	if (!net_dev)
 		return -ENOMEM;
-	net_dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA;
+	net_dev->features |= (NETIF_F_IP_CSUM | NETIF_F_SG |
+			      NETIF_F_HIGHDMA | NETIF_F_TSO);
 	if (lro)
 		net_dev->features |= NETIF_F_LRO;
 	efx = net_dev->priv;
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index ad541ba..b756840 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -272,6 +272,22 @@ static void efx_ethtool_get_stats(struct net_device *net_dev,
 	}
 }
 
+static int efx_ethtool_set_tso(struct net_device *net_dev, u32 enable)
+{
+	int rc;
+
+	/* Our TSO requires TX checksumming, so force TX checksumming
+	 * on when TSO is enabled.
+	 */
+	if (enable) {
+		rc = efx_ethtool_set_tx_csum(net_dev, 1);
+		if (rc)
+			return rc;
+	}
+
+	return ethtool_op_set_tso(net_dev, enable);
+}
+
 static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable)
 {
 	struct efx_nic *efx = net_dev->priv;
@@ -283,6 +299,15 @@ static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable)
 
 	efx_flush_queues(efx);
 
+	/* Our TSO requires TX checksumming, so disable TSO when
+	 * checksumming is disabled
+	 */
+	if (!enable) {
+		rc = efx_ethtool_set_tso(net_dev, 0);
+		if (rc)
+			return rc;
+	}
+
 	return 0;
 }
 
@@ -451,6 +476,8 @@ struct ethtool_ops efx_ethtool_ops = {
 	.set_tx_csum		= efx_ethtool_set_tx_csum,
 	.get_sg			= ethtool_op_get_sg,
 	.set_sg			= ethtool_op_set_sg,
+	.get_tso		= ethtool_op_get_tso,
+	.set_tso		= efx_ethtool_set_tso,
 	.get_flags		= ethtool_op_get_flags,
 	.set_flags		= ethtool_op_set_flags,
 	.get_strings		= efx_ethtool_get_strings,
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
index c505482..6ffa711 100644
--- a/drivers/net/sfc/net_driver.h
+++ b/drivers/net/sfc/net_driver.h
@@ -134,6 +134,8 @@ struct efx_special_buffer {
  *	Set only on the final fragment of a packet; %NULL for all other
  *	fragments.  When this fragment completes, then we can free this
  *	skb.
+ * @tsoh: The associated TSO header structure, or %NULL if this
+ *	buffer is not a TSO header.
  * @dma_addr: DMA address of the fragment.
  * @len: Length of this fragment.
  *	This field is zero when the queue slot is empty.
@@ -144,6 +146,7 @@ struct efx_special_buffer {
  */
 struct efx_tx_buffer {
 	const struct sk_buff *skb;
+	struct efx_tso_header *tsoh;
 	dma_addr_t dma_addr;
 	unsigned short len;
 	unsigned char continuation;
@@ -187,6 +190,13 @@ struct efx_tx_buffer {
  *	variable indicates that the queue is full.  This is to
  *	avoid cache-line ping-pong between the xmit path and the
  *	completion path.
+ * @tso_headers_free: A list of TSO headers allocated for this TX queue
+ *	that are not in use, and so available for new TSO sends. The list
+ *	is protected by the TX queue lock.
+ * @tso_bursts: Number of times TSO xmit invoked by kernel
+ * @tso_long_headers: Number of packets with headers too long for standard
+ *	blocks
+ * @tso_packets: Number of packets via the TSO xmit path
  */
 struct efx_tx_queue {
 	/* Members which don't change on the fast path */
@@ -206,6 +216,10 @@ struct efx_tx_queue {
 	unsigned int insert_count ____cacheline_aligned_in_smp;
 	unsigned int write_count;
 	unsigned int old_read_count;
+	struct efx_tso_header *tso_headers_free;
+	unsigned int tso_bursts;
+	unsigned int tso_long_headers;
+	unsigned int tso_packets;
 };
 
 /**
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index fbb866b..9b436f5 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -82,6 +82,46 @@ static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 	}
 }
 
+/**
+ * struct efx_tso_header - a DMA mapped buffer for packet headers
+ * @next: Linked list of free ones.
+ *	The list is protected by the TX queue lock.
+ * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
+ * @dma_addr: The DMA address of the header below.
+ *
+ * This controls the memory used for a TSO header.  Use TSOH_DATA()
+ * to find the packet header data.  Use TSOH_SIZE() to calculate the
+ * total size required for a given packet header length.  TSO headers
+ * in the free list are exactly %TSOH_STD_SIZE bytes in size.
+ */
+struct efx_tso_header {
+	union {
+		struct efx_tso_header *next;
+		size_t unmap_len;
+	};
+	dma_addr_t dma_addr;
+};
+
+static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
+			       const struct sk_buff *skb);
+static void efx_fini_tso(struct efx_tx_queue *tx_queue);
+static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
+			       struct efx_tso_header *tsoh);
+
+static inline void efx_tsoh_free(struct efx_tx_queue *tx_queue,
+				 struct efx_tx_buffer *buffer)
+{
+	if (buffer->tsoh) {
+		if (likely(!buffer->tsoh->unmap_len)) {
+			buffer->tsoh->next = tx_queue->tso_headers_free;
+			tx_queue->tso_headers_free = buffer->tsoh;
+		} else {
+			efx_tsoh_heap_free(tx_queue, buffer->tsoh);
+		}
+		buffer->tsoh = NULL;
+	}
+}
+
 
 /*
  * Add a socket buffer to a TX queue
@@ -114,6 +154,9 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
 
 	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
 
+	if (skb_shinfo((struct sk_buff *)skb)->gso_size)
+		return efx_enqueue_skb_tso(tx_queue, skb);
+
 	/* Get size of the initial fragment */
 	len = skb_headlen(skb);
 
@@ -166,6 +209,8 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
 			insert_ptr = (tx_queue->insert_count &
 				      efx->type->txd_ring_mask);
 			buffer = &tx_queue->buffer[insert_ptr];
+			efx_tsoh_free(tx_queue, buffer);
+			EFX_BUG_ON_PARANOID(buffer->tsoh);
 			EFX_BUG_ON_PARANOID(buffer->skb);
 			EFX_BUG_ON_PARANOID(buffer->len);
 			EFX_BUG_ON_PARANOID(buffer->continuation != 1);
@@ -432,6 +477,9 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
 
 	efx_release_tx_buffers(tx_queue);
 
+	/* Free up TSO header cache */
+	efx_fini_tso(tx_queue);
+
 	/* Release queue's stop on port, if any */
 	if (tx_queue->stopped) {
 		tx_queue->stopped = 0;
@@ -450,3 +498,619 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 }
 
 
+/* Efx TCP segmentation acceleration.
+ *
+ * Why?  Because by doing it here in the driver we can go significantly
+ * faster than the GSO.
+ *
+ * Requires TX checksum offload support.
+ */
+
+/* Number of bytes inserted at the start of a TSO header buffer,
+ * similar to NET_IP_ALIGN.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define TSOH_OFFSET	0
+#else
+#define TSOH_OFFSET	NET_IP_ALIGN
+#endif
+
+#define TSOH_BUFFER(tsoh)	((u8 *)(tsoh + 1) + TSOH_OFFSET)
+
+/* Total size of struct efx_tso_header, buffer and padding */
+#define TSOH_SIZE(hdr_len)					\
+	(sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
+
+/* Size of blocks on free list.  Larger blocks must be allocated from
+ * the heap.
+ */
+#define TSOH_STD_SIZE		128
+
+#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
+#define ETH_HDR_LEN(skb)  (skb_network_header(skb) - (skb)->data)
+#define SKB_TCP_OFF(skb)  PTR_DIFF(tcp_hdr(skb), (skb)->data)
+#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
+
+/**
+ * struct tso_state - TSO state for an SKB
+ * @remaining_len: Bytes of data we've yet to segment
+ * @seqnum: Current sequence number
+ * @packet_space: Remaining space in current packet
+ * @ifc: Input fragment cursor.
+ *	Where we are in the current fragment of the incoming SKB.  These
+ *	values get updated in place when we split a fragment over
+ *	multiple packets.
+ * @p: Parameters.
+ *	These values are set once at the start of the TSO send and do
+ *	not get changed as the routine progresses.
+ *
+ * The state used during segmentation.  It is put into this data structure
+ * just to make it easy to pass into inline functions.
+ */
+struct tso_state {
+	unsigned remaining_len;
+	unsigned seqnum;
+	unsigned packet_space;
+
+	struct {
+		/* DMA address of current position */
+		dma_addr_t dma_addr;
+		/* Remaining length */
+		unsigned int len;
+		/* DMA address and length of the whole fragment */
+		unsigned int unmap_len;
+		dma_addr_t unmap_addr;
+		struct page *page;
+		unsigned page_off;
+	} ifc;
+
+	struct {
+		/* The number of bytes of header */
+		unsigned int header_length;
+
+		/* The number of bytes to put in each outgoing segment. */
+		int full_packet_size;
+
+		/* Current IPv4 ID, host endian. */
+		unsigned ipv4_id;
+	} p;
+};
+
+
+/*
+ * Verify that our various assumptions about sk_buffs and the conditions
+ * under which TSO will be attempted hold true.
+ */
+static inline void efx_tso_check_safe(const struct sk_buff *skb)
+{
+	EFX_BUG_ON_PARANOID(skb->protocol != htons(ETH_P_IP));
+	EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
+			    skb->protocol);
+	EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
+	EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
+			     + (tcp_hdr(skb)->doff << 2u)) >
+			    skb_headlen(skb));
+}
+
+
+/*
+ * Allocate a page worth of efx_tso_header structures, and string them
+ * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
+ */
+static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
+{
+
+	struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
+	struct efx_tso_header *tsoh;
+	dma_addr_t dma_addr;
+	u8 *base_kva, *kva;
+
+	base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
+	if (base_kva == NULL) {
+		EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO"
+			" headers\n");
+		return -ENOMEM;
+	}
+
+	/* pci_alloc_consistent() allocates pages. */
+	EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
+
+	for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
+		tsoh = (struct efx_tso_header *)kva;
+		tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
+		tsoh->next = tx_queue->tso_headers_free;
+		tx_queue->tso_headers_free = tsoh;
+	}
+
+	return 0;
+}
+
+
+/* Free up a TSO header, and all others in the same page. */
+static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
+				struct efx_tso_header *tsoh,
+				struct pci_dev *pci_dev)
+{
+	struct efx_tso_header **p;
+	unsigned long base_kva;
+	dma_addr_t base_dma;
+
+	base_kva = (unsigned long)tsoh & PAGE_MASK;
+	base_dma = tsoh->dma_addr & PAGE_MASK;
+
+	p = &tx_queue->tso_headers_free;
+	while (*p != NULL)
+		if (((unsigned long)*p & PAGE_MASK) == base_kva)
+			*p = (*p)->next;
+		else
+			p = &(*p)->next;
+
+	pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
+}
+
+static struct efx_tso_header *
+efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
+{
+	struct efx_tso_header *tsoh;
+
+	tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
+	if (unlikely(!tsoh))
+		return NULL;
+
+	tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
+					TSOH_BUFFER(tsoh), header_len,
+					PCI_DMA_TODEVICE);
+	if (unlikely(pci_dma_mapping_error(tsoh->dma_addr))) {
+		kfree(tsoh);
+		return NULL;
+	}
+
+	tsoh->unmap_len = header_len;
+	return tsoh;
+}
+
+static void
+efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
+{
+	pci_unmap_single(tx_queue->efx->pci_dev,
+			 tsoh->dma_addr, tsoh->unmap_len,
+			 PCI_DMA_TODEVICE);
+	kfree(tsoh);
+}
+
+/**
+ * efx_tx_queue_insert - push descriptors onto the TX queue
+ * @tx_queue:		Efx TX queue
+ * @dma_addr:		DMA address of fragment
+ * @len:		Length of fragment
+ * @skb:		Only non-null for end of last segment
+ * @end_of_packet:	True if last fragment in a packet
+ * @unmap_addr:		DMA address of fragment for unmapping
+ * @unmap_len:		Only set this in last segment of a fragment
+ *
+ * Push descriptors onto the TX queue.  Return 0 on success or 1 if
+ * @tx_queue full.
+ */
+static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
+			       dma_addr_t dma_addr, unsigned len,
+			       const struct sk_buff *skb, int end_of_packet,
+			       dma_addr_t unmap_addr, unsigned unmap_len)
+{
+	struct efx_tx_buffer *buffer;
+	struct efx_nic *efx = tx_queue->efx;
+	unsigned dma_len, fill_level, insert_ptr, misalign;
+	int q_space;
+
+	EFX_BUG_ON_PARANOID(len <= 0);
+
+	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
+	/* -1 as there is no way to represent all descriptors used */
+	q_space = efx->type->txd_ring_mask - 1 - fill_level;
+
+	while (1) {
+		if (unlikely(q_space-- <= 0)) {
+			/* It might be that completions have happened
+			 * since the xmit path last checked.  Update
+			 * the xmit path's copy of read_count.
+			 */
+			++tx_queue->stopped;
+			/* This memory barrier protects the change of
+			 * stopped from the access of read_count. */
+			smp_mb();
+			tx_queue->old_read_count =
+				*(volatile unsigned *)&tx_queue->read_count;
+			fill_level = (tx_queue->insert_count
+				      - tx_queue->old_read_count);
+			q_space = efx->type->txd_ring_mask - 1 - fill_level;
+			if (unlikely(q_space-- <= 0))
+				return 1;
+			smp_mb();
+			--tx_queue->stopped;
+		}
+
+		insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
+		buffer = &tx_queue->buffer[insert_ptr];
+		++tx_queue->insert_count;
+
+		EFX_BUG_ON_PARANOID(tx_queue->insert_count -
+				    tx_queue->read_count >
+				    efx->type->txd_ring_mask);
+
+		efx_tsoh_free(tx_queue, buffer);
+		EFX_BUG_ON_PARANOID(buffer->len);
+		EFX_BUG_ON_PARANOID(buffer->unmap_len);
+		EFX_BUG_ON_PARANOID(buffer->skb);
+		EFX_BUG_ON_PARANOID(buffer->continuation != 1);
+		EFX_BUG_ON_PARANOID(buffer->tsoh);
+
+		buffer->dma_addr = dma_addr;
+
+		/* Ensure we do not cross a boundary unsupported by H/W */
+		dma_len = (~dma_addr & efx->type->tx_dma_mask) + 1;
+
+		misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
+		if (misalign && dma_len + misalign > 512)
+			dma_len = 512 - misalign;
+
+		/* If there is enough space to send then do so */
+		if (dma_len >= len)
+			break;
+
+		buffer->len = dma_len; /* Don't set the other members */
+		dma_addr += dma_len;
+		len -= dma_len;
+	}
+
+	EFX_BUG_ON_PARANOID(!len);
+	buffer->len = len;
+	buffer->skb = skb;
+	buffer->continuation = !end_of_packet;
+	buffer->unmap_addr = unmap_addr;
+	buffer->unmap_len = unmap_len;
+	return 0;
+}
+
+
+/*
+ * Put a TSO header into the TX queue.
+ *
+ * This is special-cased because we know that it is small enough to fit in
+ * a single fragment, and we know it doesn't cross a page boundary.  It
+ * also allows us to not worry about end-of-packet etc.
+ */
+static inline void efx_tso_put_header(struct efx_tx_queue *tx_queue,
+				      struct efx_tso_header *tsoh, unsigned len)
+{
+	struct efx_tx_buffer *buffer;
+
+	buffer = &tx_queue->buffer[tx_queue->insert_count &
+				   tx_queue->efx->type->txd_ring_mask];
+	efx_tsoh_free(tx_queue, buffer);
+	EFX_BUG_ON_PARANOID(buffer->len);
+	EFX_BUG_ON_PARANOID(buffer->unmap_len);
+	EFX_BUG_ON_PARANOID(buffer->skb);
+	EFX_BUG_ON_PARANOID(buffer->continuation != 1);
+	EFX_BUG_ON_PARANOID(buffer->tsoh);
+	buffer->len = len;
+	buffer->dma_addr = tsoh->dma_addr;
+	buffer->tsoh = tsoh;
+
+	++tx_queue->insert_count;
+}
+
+
+/* Remove descriptors put into a tx_queue. */
+static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
+{
+	struct efx_tx_buffer *buffer;
+
+	/* Work backwards until we hit the original insert pointer value */
+	while (tx_queue->insert_count != tx_queue->write_count) {
+		--tx_queue->insert_count;
+		buffer = &tx_queue->buffer[tx_queue->insert_count &
+					   tx_queue->efx->type->txd_ring_mask];
+		efx_tsoh_free(tx_queue, buffer);
+		EFX_BUG_ON_PARANOID(buffer->skb);
+		buffer->len = 0;
+		buffer->continuation = 1;
+		if (buffer->unmap_len) {
+			pci_unmap_page(tx_queue->efx->pci_dev,
+				       buffer->unmap_addr,
+				       buffer->unmap_len, PCI_DMA_TODEVICE);
+			buffer->unmap_len = 0;
+		}
+	}
+}
+
+
+/* Parse the SKB header and initialise state. */
+static inline void tso_start(struct tso_state *st, const struct sk_buff *skb)
+{
+	/* All ethernet/IP/TCP headers combined size is TCP header size
+	 * plus offset of TCP header relative to start of packet.
+	 */
+	st->p.header_length = ((tcp_hdr(skb)->doff << 2u)
+			       + PTR_DIFF(tcp_hdr(skb), skb->data));
+	st->p.full_packet_size = (st->p.header_length
+				  + skb_shinfo(skb)->gso_size);
+
+	st->p.ipv4_id = ntohs(ip_hdr(skb)->id);
+	st->seqnum = ntohl(tcp_hdr(skb)->seq);
+
+	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
+	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
+	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
+
+	st->packet_space = st->p.full_packet_size;
+	st->remaining_len = skb->len - st->p.header_length;
+}
+
+
+/**
+ * tso_get_fragment - record fragment details and map for DMA
+ * @st:			TSO state
+ * @efx:		Efx NIC
+ * @data:		Pointer to fragment data
+ * @len:		Length of fragment
+ *
+ * Record fragment details and map for DMA.  Return 0 on success, or
+ * -%ENOMEM if DMA mapping fails.
+ */
+static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
+				   int len, struct page *page, int page_off)
+{
+
+	st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off,
+					  len, PCI_DMA_TODEVICE);
+	if (likely(!pci_dma_mapping_error(st->ifc.unmap_addr))) {
+		st->ifc.unmap_len = len;
+		st->ifc.len = len;
+		st->ifc.dma_addr = st->ifc.unmap_addr;
+		st->ifc.page = page;
+		st->ifc.page_off = page_off;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+
+/**
+ * tso_fill_packet_with_fragment - form descriptors for the current fragment
+ * @tx_queue:		Efx TX queue
+ * @skb:		Socket buffer
+ * @st:			TSO state
+ *
+ * Form descriptors for the current fragment, until we reach the end
+ * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
+ * space in @tx_queue.
+ */
+static inline int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
+						const struct sk_buff *skb,
+						struct tso_state *st)
+{
+
+	int n, end_of_packet, rc;
+
+	if (st->ifc.len == 0)
+		return 0;
+	if (st->packet_space == 0)
+		return 0;
+
+	EFX_BUG_ON_PARANOID(st->ifc.len <= 0);
+	EFX_BUG_ON_PARANOID(st->packet_space <= 0);
+
+	n = min(st->ifc.len, st->packet_space);
+
+	st->packet_space -= n;
+	st->remaining_len -= n;
+	st->ifc.len -= n;
+	st->ifc.page_off += n;
+	end_of_packet = st->remaining_len == 0 || st->packet_space == 0;
+
+	rc = efx_tx_queue_insert(tx_queue, st->ifc.dma_addr, n,
+				 st->remaining_len ? NULL : skb,
+				 end_of_packet, st->ifc.unmap_addr,
+				 st->ifc.len ? 0 : st->ifc.unmap_len);
+
+	st->ifc.dma_addr += n;
+
+	return rc;
+}
+
+
+/**
+ * tso_start_new_packet - generate a new header and prepare for the new packet
+ * @tx_queue:		Efx TX queue
+ * @skb:		Socket buffer
+ * @st:			TSO state
+ *
+ * Generate a new header and prepare for the new packet.  Return 0 on
+ * success, or -1 if failed to alloc header.
+ */
+static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue,
+				       const struct sk_buff *skb,
+				       struct tso_state *st)
+{
+	struct efx_tso_header *tsoh;
+	struct iphdr *tsoh_iph;
+	struct tcphdr *tsoh_th;
+	unsigned ip_length;
+	u8 *header;
+
+	/* Allocate a DMA-mapped header buffer. */
+	if (likely(TSOH_SIZE(st->p.header_length) <= TSOH_STD_SIZE)) {
+		if (tx_queue->tso_headers_free == NULL)
+			if (efx_tsoh_block_alloc(tx_queue))
+				return -1;
+		EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
+		tsoh = tx_queue->tso_headers_free;
+		tx_queue->tso_headers_free = tsoh->next;
+		tsoh->unmap_len = 0;
+	} else {
+		tx_queue->tso_long_headers++;
+		tsoh = efx_tsoh_heap_alloc(tx_queue, st->p.header_length);
+		if (unlikely(!tsoh))
+			return -1;
+	}
+
+	header = TSOH_BUFFER(tsoh);
+	tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
+	tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb));
+
+	/* Copy and update the headers. */
+	memcpy(header, skb->data, st->p.header_length);
+
+	tsoh_th->seq = htonl(st->seqnum);
+	st->seqnum += skb_shinfo(skb)->gso_size;
+	if (st->remaining_len > skb_shinfo(skb)->gso_size) {
+		/* This packet will not finish the TSO burst. */
+		ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
+		tsoh_th->fin = 0;
+		tsoh_th->psh = 0;
+	} else {
+		/* This packet will be the last in the TSO burst. */
+		ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
+			     + st->remaining_len);
+		tsoh_th->fin = tcp_hdr(skb)->fin;
+		tsoh_th->psh = tcp_hdr(skb)->psh;
+	}
+	tsoh_iph->tot_len = htons(ip_length);
+
+	/* Linux leaves suitable gaps in the IP ID space for us to fill. */
+	tsoh_iph->id = htons(st->p.ipv4_id);
+	st->p.ipv4_id++;
+
+	st->packet_space = skb_shinfo(skb)->gso_size;
+	++tx_queue->tso_packets;
+
+	/* Form a descriptor for this header. */
+	efx_tso_put_header(tx_queue, tsoh, st->p.header_length);
+
+	return 0;
+}
+
+
+/**
+ * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
+ * @tx_queue:		Efx TX queue
+ * @skb:		Socket buffer
+ *
+ * Context: You must hold netif_tx_lock() to call this function.
+ *
+ * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
+ * @skb was not enqueued.  In all cases @skb is consumed.  Return
+ * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
+ */
+static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
+			       const struct sk_buff *skb)
+{
+	int frag_i, rc, rc2 = NETDEV_TX_OK;
+	struct tso_state state;
+	skb_frag_t *f;
+
+	/* Verify TSO is safe - these checks should never fail. */
+	efx_tso_check_safe(skb);
+
+	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
+
+	tso_start(&state, skb);
+
+	/* Assume that skb header area contains exactly the headers, and
+	 * all payload is in the frag list.
+	 */
+	if (skb_headlen(skb) == state.p.header_length) {
+		/* Grab the first payload fragment. */
+		EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
+		frag_i = 0;
+		f = &skb_shinfo(skb)->frags[frag_i];
+		rc = tso_get_fragment(&state, tx_queue->efx,
+				      f->size, f->page, f->page_offset);
+		if (rc)
+			goto mem_err;
+	} else {
+		/* It may look like this code fragment assumes that the
+		 * skb->data portion does not cross a page boundary, but
+		 * that is not the case.  It is guaranteed to be direct
+		 * mapped memory, and therefore is physically contiguous,
+		 * and so DMA will work fine.  kmap_atomic() on this region
+		 * will just return the direct mapping, so that will work
+		 * too.
+		 */
+		int page_off = (unsigned long)skb->data & (PAGE_SIZE - 1);
+		int hl = state.p.header_length;
+		rc = tso_get_fragment(&state, tx_queue->efx,
+				      skb_headlen(skb) - hl,
+				      virt_to_page(skb->data), page_off + hl);
+		if (rc)
+			goto mem_err;
+		frag_i = -1;
+	}
+
+	if (tso_start_new_packet(tx_queue, skb, &state) < 0)
+		goto mem_err;
+
+	while (1) {
+		rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
+		if (unlikely(rc))
+			goto stop;
+
+		/* Move onto the next fragment? */
+		if (state.ifc.len == 0) {
+			if (++frag_i >= skb_shinfo(skb)->nr_frags)
+				/* End of payload reached. */
+				break;
+			f = &skb_shinfo(skb)->frags[frag_i];
+			rc = tso_get_fragment(&state, tx_queue->efx,
+					      f->size, f->page, f->page_offset);
+			if (rc)
+				goto mem_err;
+		}
+
+		/* Start at new packet? */
+		if (state.packet_space == 0 &&
+		    tso_start_new_packet(tx_queue, skb, &state) < 0)
+			goto mem_err;
+	}
+
+	/* Pass off to hardware */
+	falcon_push_buffers(tx_queue);
+
+	tx_queue->tso_bursts++;
+	return NETDEV_TX_OK;
+
+ mem_err:
+	EFX_ERR(tx_queue->efx, "Out of memory for TSO headers, or PCI mapping"
+		" error\n");
+	dev_kfree_skb_any((struct sk_buff *)skb);
+	goto unwind;
+
+ stop:
+	rc2 = NETDEV_TX_BUSY;
+
+	/* Stop the queue if it wasn't stopped before. */
+	if (tx_queue->stopped == 1)
+		efx_stop_queue(tx_queue->efx);
+
+ unwind:
+	efx_enqueue_unwind(tx_queue);
+	return rc2;
+}
+
+
+/*
+ * Free up all TSO datastructures associated with tx_queue. This
+ * routine should be called only once the tx_queue is both empty and
+ * will no longer be used.
+ */
+static void efx_fini_tso(struct efx_tx_queue *tx_queue)
+{
+	unsigned i;
+
+	if (tx_queue->buffer)
+		for (i = 0; i <= tx_queue->efx->type->txd_ring_mask; ++i)
+			efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
+
+	while (tx_queue->tso_headers_free != NULL)
+		efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
+				    tx_queue->efx->pci_dev);
+}
-- 
cgit v0.10.2


From 75f2d3eac93277fa022b2fbe51257e856575e757 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 12:55:13 +0100
Subject: [netdrvr] sfc: Add phy_flash_cfg module parameter and implementation

The 10Xpress PHY supports flash upgrades through MDIO, but needs to be
put in upgrade mode at power-up.  This adds a module parameter and other
logic to support that.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/boards.h b/drivers/net/sfc/boards.h
index f56341d..695764d 100644
--- a/drivers/net/sfc/boards.h
+++ b/drivers/net/sfc/boards.h
@@ -22,5 +22,7 @@ enum efx_board_type {
 extern int efx_set_board_info(struct efx_nic *efx, u16 revision_info);
 extern int sfe4001_poweron(struct efx_nic *efx);
 extern void sfe4001_poweroff(struct efx_nic *efx);
+/* Are we putting the PHY into flash config mode */
+extern unsigned int sfe4001_phy_flash_cfg;
 
 #endif
diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c
index aa7521b..d99efe2 100644
--- a/drivers/net/sfc/falcon_xmac.c
+++ b/drivers/net/sfc/falcon_xmac.c
@@ -69,6 +69,10 @@ static int falcon_reset_xmac(struct efx_nic *efx)
 		udelay(10);
 	}
 
+	/* This often fails when DSP is disabled, ignore it */
+	if (sfe4001_phy_flash_cfg != 0)
+		return 0;
+
 	EFX_ERR(efx, "timed out waiting for XMAC core reset\n");
 	return -ETIMEDOUT;
 }
diff --git a/drivers/net/sfc/sfe4001.c b/drivers/net/sfc/sfe4001.c
index 11fa9fb..725d1a5 100644
--- a/drivers/net/sfc/sfe4001.c
+++ b/drivers/net/sfc/sfe4001.c
@@ -130,6 +130,15 @@ void sfe4001_poweroff(struct efx_nic *efx)
 	(void) efx_i2c_read(i2c, MAX6647, RSL, &in, 1);
 }
 
+/* The P0_EN_3V3X line on SFE4001 boards (from A2 onward) is connected
+ * to the FLASH_CFG_1 input on the DSP.  We must keep it high at power-
+ * up to allow writing the flash (done through MDIO from userland).
+ */
+unsigned int sfe4001_phy_flash_cfg;
+module_param_named(phy_flash_cfg, sfe4001_phy_flash_cfg, uint, 0444);
+MODULE_PARM_DESC(phy_flash_cfg,
+		 "Force PHY to enter flash configuration mode");
+
 /* This board uses an I2C expander to provider power to the PHY, which needs to
  * be turned on before the PHY can be used.
  * Context: Process context, rtnl lock held
@@ -203,6 +212,8 @@ int sfe4001_poweron(struct efx_nic *efx)
 		out = 0xff & ~((1 << P0_EN_1V2_LBN) | (1 << P0_EN_2V5_LBN) |
 			       (1 << P0_EN_3V3X_LBN) | (1 << P0_EN_5V_LBN) |
 			       (1 << P0_X_TRST_LBN));
+		if (sfe4001_phy_flash_cfg)
+			out |= 1 << P0_EN_3V3X_LBN;
 
 		rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1);
 		if (rc)
@@ -226,6 +237,9 @@ int sfe4001_poweron(struct efx_nic *efx)
 		if (in & (1 << P1_AFE_PWD_LBN))
 			goto done;
 
+		/* DSP doesn't look powered in flash config mode */
+		if (sfe4001_phy_flash_cfg)
+			goto done;
 	} while (++count < 20);
 
 	EFX_INFO(efx, "timed out waiting for power\n");
diff --git a/drivers/net/sfc/tenxpress.c b/drivers/net/sfc/tenxpress.c
index a2e9f79..d8df031 100644
--- a/drivers/net/sfc/tenxpress.c
+++ b/drivers/net/sfc/tenxpress.c
@@ -199,10 +199,12 @@ static int tenxpress_phy_init(struct efx_nic *efx)
 
 	tenxpress_set_state(efx, TENXPRESS_STATUS_NORMAL);
 
-	rc = mdio_clause45_wait_reset_mmds(efx,
-					   TENXPRESS_REQUIRED_DEVS);
-	if (rc < 0)
-		goto fail;
+	if (!sfe4001_phy_flash_cfg) {
+		rc = mdio_clause45_wait_reset_mmds(efx,
+						   TENXPRESS_REQUIRED_DEVS);
+		if (rc < 0)
+			goto fail;
+	}
 
 	rc = mdio_clause45_check_mmds(efx, TENXPRESS_REQUIRED_DEVS, 0);
 	if (rc < 0)
-- 
cgit v0.10.2


From ba911a4d16fb2dd562f5595731fc96bc8c4929d7 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 12:56:57 +0100
Subject: [netdrvr] sfc: Removed bogus 'fall-thru' comments

Fall-through is expected outside a switch statement.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c
index 46db549..9cac344 100644
--- a/drivers/net/sfc/falcon.c
+++ b/drivers/net/sfc/falcon.c
@@ -2468,14 +2468,12 @@ int falcon_probe_nic(struct efx_nic *efx)
  fail5:
 	falcon_free_buffer(efx, &efx->irq_status);
  fail4:
-	/* fall-thru */
  fail3:
 	if (nic_data->pci_dev2) {
 		pci_dev_put(nic_data->pci_dev2);
 		nic_data->pci_dev2 = NULL;
 	}
  fail2:
-	/* fall-thru */
  fail1:
 	kfree(efx->nic_data);
 	return rc;
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 551299b..9fd1984 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -736,7 +736,6 @@ void __efx_rx_packet(struct efx_channel *channel,
 	/* Update allocation strategy method */
 	channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
 
-	/* fall-thru */
 done:
 	efx->net_dev->last_rx = jiffies;
 }
-- 
cgit v0.10.2


From 707d982700c4cde83913f23eb6430a5bb435122a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 12:57:44 +0100
Subject: [netdrvr] sfc: Remove garbage from comment

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/mdio_10g.h b/drivers/net/sfc/mdio_10g.h
index 2214b6d..338c62c 100644
--- a/drivers/net/sfc/mdio_10g.h
+++ b/drivers/net/sfc/mdio_10g.h
@@ -95,7 +95,7 @@
 #define MDIO_PMAPMD_CTRL2_10_BT		(0xf)
 #define MDIO_PMAPMD_CTRL2_TYPE_MASK	(0xf)
 
-/* /\* PHY XGXS lane state *\/ */
+/* PHY XGXS lane state */
 #define MDIO_PHYXS_LANE_STATE		(0x18)
 #define MDIO_PHYXS_LANE_ALIGNED_LBN	(12)
 
-- 
cgit v0.10.2


From d6742d4a6dfc362b5dbb3e759e6198c3dbb47dbc Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 12:58:13 +0100
Subject: [netdrvr] sfc: Remove kernel-doc comments for removed members of
 struct efx_nic

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
index 6ffa711..9c285fb 100644
--- a/drivers/net/sfc/net_driver.h
+++ b/drivers/net/sfc/net_driver.h
@@ -667,8 +667,6 @@ union efx_multicast_hash {
  * @phy_op: PHY interface
  * @phy_data: PHY private data (including PHY-specific stats)
  * @mii: PHY interface
- * @phy_powered: PHY power state
- * @tx_disabled: PHY transmitter turned off
  * @link_up: Link status
  * @link_options: Link options (MII/GMII format)
  * @n_link_state_changes: Number of times the link has changed state
-- 
cgit v0.10.2


From e52eddaece487b0855f5974ee0a0a3a172043ba8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 12:58:41 +0100
Subject: [netdrvr] sfc: Fix code formatting

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c
index d99efe2..8c41662 100644
--- a/drivers/net/sfc/falcon_xmac.c
+++ b/drivers/net/sfc/falcon_xmac.c
@@ -32,7 +32,7 @@
 	(FALCON_XMAC_REGBANK + ((mac_reg) * FALCON_XMAC_REG_SIZE))
 
 void falcon_xmac_writel(struct efx_nic *efx,
-			efx_dword_t *value, unsigned int mac_reg)
+			 efx_dword_t *value, unsigned int mac_reg)
 {
 	efx_oword_t temp;
 
@@ -227,7 +227,7 @@ static int falcon_xgmii_status(struct efx_nic *efx)
 	/* The ISR latches, so clear it and re-read */
 	falcon_xmac_readl(efx, &reg, XM_MGT_INT_REG_MAC_B0);
 	falcon_xmac_readl(efx, &reg, XM_MGT_INT_REG_MAC_B0);
-	
+
 	if (EFX_DWORD_FIELD(reg, XM_LCLFLT) ||
 	    EFX_DWORD_FIELD(reg, XM_RMTFLT)) {
 		EFX_INFO(efx, "MGT_INT: "EFX_DWORD_FMT"\n", EFX_DWORD_VAL(reg));
-- 
cgit v0.10.2


From 53269e94cdaca6e470c18099912de977a193e815 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 12:59:10 +0100
Subject: [netdrvr] sfc: Remove unused macro EFX_XAUI_RETRAIN_MAX

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c
index 8c41662..b875c7b 100644
--- a/drivers/net/sfc/falcon_xmac.c
+++ b/drivers/net/sfc/falcon_xmac.c
@@ -495,8 +495,6 @@ void falcon_update_stats_xmac(struct efx_nic *efx)
 		(mac_stats->rx_bytes - mac_stats->rx_good_bytes);
 }
 
-#define EFX_XAUI_RETRAIN_MAX 8
-
 int falcon_check_xmac(struct efx_nic *efx)
 {
 	unsigned xaui_link_ok;
-- 
cgit v0.10.2


From 05e3ec04460180f48810cddc2f78e80a725657ad Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 13:00:39 +0100
Subject: [netdrvr] sfc: Increment rx_reset when reported as driver event

An RX_RESET event can be reported either as a global or as a driver event.
We were counting only global events.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c
index 9cac344..247629c 100644
--- a/drivers/net/sfc/falcon.c
+++ b/drivers/net/sfc/falcon.c
@@ -1129,6 +1129,7 @@ static void falcon_handle_driver_event(struct efx_channel *channel,
 	case RX_RECOVERY_EV_DECODE:
 		EFX_ERR(efx, "channel %d seen DRIVER RX_RESET event. "
 			"Resetting.\n", channel->channel);
+		atomic_inc(&efx->rx_reset);
 		efx_schedule_reset(efx,
 				   EFX_WORKAROUND_6555(efx) ?
 				   RESET_TYPE_RX_RECOVERY :
-- 
cgit v0.10.2


From 3273c2e8c66a21ae1c53b0c730ee937c6efde7e2 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 7 May 2008 13:36:19 +0100
Subject: [netdrvr] sfc: sfc: Add self-test support

Add a set of self-tests accessible thorugh ethtool.
Add hardware loopback and TX disable control code to support them.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile
index 0f02344..1d2daee 100644
--- a/drivers/net/sfc/Makefile
+++ b/drivers/net/sfc/Makefile
@@ -1,5 +1,5 @@
 sfc-y			+= efx.o falcon.o tx.o rx.o falcon_xmac.o \
-			   i2c-direct.o ethtool.o xfp_phy.o mdio_10g.o \
-			   tenxpress.o boards.o sfe4001.o
+			   i2c-direct.o selftest.o ethtool.o xfp_phy.o \
+			   mdio_10g.o tenxpress.o boards.o sfe4001.o
 
 obj-$(CONFIG_SFC)	+= sfc.o
diff --git a/drivers/net/sfc/enum.h b/drivers/net/sfc/enum.h
index 43663a4..c53290d 100644
--- a/drivers/net/sfc/enum.h
+++ b/drivers/net/sfc/enum.h
@@ -10,6 +10,55 @@
 #ifndef EFX_ENUM_H
 #define EFX_ENUM_H
 
+/**
+ * enum efx_loopback_mode - loopback modes
+ * @LOOPBACK_NONE: no loopback
+ * @LOOPBACK_XGMII: loopback within MAC at XGMII level
+ * @LOOPBACK_XGXS: loopback within MAC at XGXS level
+ * @LOOPBACK_XAUI: loopback within MAC at XAUI level
+ * @LOOPBACK_PHYXS: loopback within PHY at PHYXS level
+ * @LOOPBACK_PCS: loopback within PHY at PCS level
+ * @LOOPBACK_PMAPMD: loopback within PHY at PMAPMD level
+ * @LOOPBACK_NETWORK: reflecting loopback (even further than furthest!)
+ */
+/* Please keep in order and up-to-date w.r.t the following two #defines */
+enum efx_loopback_mode {
+	LOOPBACK_NONE = 0,
+	LOOPBACK_MAC = 1,
+	LOOPBACK_XGMII = 2,
+	LOOPBACK_XGXS = 3,
+	LOOPBACK_XAUI = 4,
+	LOOPBACK_PHY = 5,
+	LOOPBACK_PHYXS = 6,
+	LOOPBACK_PCS = 7,
+	LOOPBACK_PMAPMD = 8,
+	LOOPBACK_NETWORK = 9,
+	LOOPBACK_MAX
+};
+
+#define LOOPBACK_TEST_MAX LOOPBACK_PMAPMD
+
+extern const char *efx_loopback_mode_names[];
+#define LOOPBACK_MODE_NAME(mode)			\
+	STRING_TABLE_LOOKUP(mode, efx_loopback_mode)
+#define LOOPBACK_MODE(efx)				\
+	LOOPBACK_MODE_NAME(efx->loopback_mode)
+
+/* These loopbacks occur within the controller */
+#define LOOPBACKS_10G_INTERNAL ((1 << LOOPBACK_XGMII)| \
+				(1 << LOOPBACK_XGXS) | \
+				(1 << LOOPBACK_XAUI))
+
+#define LOOPBACK_MASK(_efx)			\
+	(1 << (_efx)->loopback_mode)
+
+#define LOOPBACK_INTERNAL(_efx)						\
+	((LOOPBACKS_10G_INTERNAL & LOOPBACK_MASK(_efx)) ? 1 : 0)
+
+#define LOOPBACK_OUT_OF(_from, _to, _mask)		\
+	(((LOOPBACK_MASK(_from) & (_mask)) &&		\
+	  ((LOOPBACK_MASK(_to) & (_mask)) == 0)) ? 1 : 0)
+
 /*****************************************************************************/
 
 /**
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index b756840..e2c75d1 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -12,12 +12,26 @@
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 #include "net_driver.h"
+#include "selftest.h"
 #include "efx.h"
 #include "ethtool.h"
 #include "falcon.h"
 #include "gmii.h"
 #include "mac.h"
 
+const char *efx_loopback_mode_names[] = {
+	[LOOPBACK_NONE]		= "NONE",
+	[LOOPBACK_MAC]		= "MAC",
+	[LOOPBACK_XGMII]	= "XGMII",
+	[LOOPBACK_XGXS]		= "XGXS",
+	[LOOPBACK_XAUI] 	= "XAUI",
+	[LOOPBACK_PHY]		= "PHY",
+	[LOOPBACK_PHYXS]	= "PHY(XS)",
+	[LOOPBACK_PCS]	 	= "PHY(PCS)",
+	[LOOPBACK_PMAPMD]	= "PHY(PMAPMD)",
+	[LOOPBACK_NETWORK]	= "NETWORK",
+};
+
 static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable);
 
 struct ethtool_string {
@@ -217,23 +231,179 @@ static void efx_ethtool_get_drvinfo(struct net_device *net_dev,
 	strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
 }
 
+/**
+ * efx_fill_test - fill in an individual self-test entry
+ * @test_index:		Index of the test
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ * @test:		Pointer to test result (used only if data != %NULL)
+ * @unit_format:	Unit name format (e.g. "channel\%d")
+ * @unit_id:		Unit id (e.g. 0 for "channel0")
+ * @test_format:	Test name format (e.g. "loopback.\%s.tx.sent")
+ * @test_id:		Test id (e.g. "PHY" for "loopback.PHY.tx_sent")
+ *
+ * Fill in an individual self-test entry.
+ */
+static void efx_fill_test(unsigned int test_index,
+			  struct ethtool_string *strings, u64 *data,
+			  int *test, const char *unit_format, int unit_id,
+			  const char *test_format, const char *test_id)
+{
+	struct ethtool_string unit_str, test_str;
+
+	/* Fill data value, if applicable */
+	if (data)
+		data[test_index] = *test;
+
+	/* Fill string, if applicable */
+	if (strings) {
+		snprintf(unit_str.name, sizeof(unit_str.name),
+			 unit_format, unit_id);
+		snprintf(test_str.name, sizeof(test_str.name),
+			 test_format, test_id);
+		snprintf(strings[test_index].name,
+			 sizeof(strings[test_index].name),
+			 "%-9s%-17s", unit_str.name, test_str.name);
+	}
+}
+
+#define EFX_PORT_NAME "port%d", 0
+#define EFX_CHANNEL_NAME(_channel) "channel%d", _channel->channel
+#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
+#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
+#define EFX_LOOPBACK_NAME(_mode, _counter)			\
+	"loopback.%s." _counter, LOOPBACK_MODE_NAME(mode)
+
+/**
+ * efx_fill_loopback_test - fill in a block of loopback self-test entries
+ * @efx:		Efx NIC
+ * @lb_tests:		Efx loopback self-test results structure
+ * @mode:		Loopback test mode
+ * @test_index:		Starting index of the test
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ */
+static int efx_fill_loopback_test(struct efx_nic *efx,
+				  struct efx_loopback_self_tests *lb_tests,
+				  enum efx_loopback_mode mode,
+				  unsigned int test_index,
+				  struct ethtool_string *strings, u64 *data)
+{
+	struct efx_tx_queue *tx_queue;
+
+	efx_for_each_tx_queue(tx_queue, efx) {
+		efx_fill_test(test_index++, strings, data,
+			      &lb_tests->tx_sent[tx_queue->queue],
+			      EFX_TX_QUEUE_NAME(tx_queue),
+			      EFX_LOOPBACK_NAME(mode, "tx_sent"));
+		efx_fill_test(test_index++, strings, data,
+			      &lb_tests->tx_done[tx_queue->queue],
+			      EFX_TX_QUEUE_NAME(tx_queue),
+			      EFX_LOOPBACK_NAME(mode, "tx_done"));
+	}
+	efx_fill_test(test_index++, strings, data,
+		      &lb_tests->rx_good,
+		      EFX_PORT_NAME,
+		      EFX_LOOPBACK_NAME(mode, "rx_good"));
+	efx_fill_test(test_index++, strings, data,
+		      &lb_tests->rx_bad,
+		      EFX_PORT_NAME,
+		      EFX_LOOPBACK_NAME(mode, "rx_bad"));
+
+	return test_index;
+}
+
+/**
+ * efx_ethtool_fill_self_tests - get self-test details
+ * @efx:		Efx NIC
+ * @tests:		Efx self-test results structure, or %NULL
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ */
+static int efx_ethtool_fill_self_tests(struct efx_nic *efx,
+				       struct efx_self_tests *tests,
+				       struct ethtool_string *strings,
+				       u64 *data)
+{
+	struct efx_channel *channel;
+	unsigned int n = 0;
+	enum efx_loopback_mode mode;
+
+	/* Interrupt */
+	efx_fill_test(n++, strings, data, &tests->interrupt,
+		      "core", 0, "interrupt", NULL);
+
+	/* Event queues */
+	efx_for_each_channel(channel, efx) {
+		efx_fill_test(n++, strings, data,
+			      &tests->eventq_dma[channel->channel],
+			      EFX_CHANNEL_NAME(channel),
+			      "eventq.dma", NULL);
+		efx_fill_test(n++, strings, data,
+			      &tests->eventq_int[channel->channel],
+			      EFX_CHANNEL_NAME(channel),
+			      "eventq.int", NULL);
+		efx_fill_test(n++, strings, data,
+			      &tests->eventq_poll[channel->channel],
+			      EFX_CHANNEL_NAME(channel),
+			      "eventq.poll", NULL);
+	}
+
+	/* PHY presence */
+	efx_fill_test(n++, strings, data, &tests->phy_ok,
+		      EFX_PORT_NAME, "phy_ok", NULL);
+
+	/* Loopback tests */
+	efx_fill_test(n++, strings, data, &tests->loopback_speed,
+		      EFX_PORT_NAME, "loopback.speed", NULL);
+	efx_fill_test(n++, strings, data, &tests->loopback_full_duplex,
+		      EFX_PORT_NAME, "loopback.full_duplex", NULL);
+	for (mode = LOOPBACK_NONE; mode < LOOPBACK_TEST_MAX; mode++) {
+		if (!(efx->loopback_modes & (1 << mode)))
+			continue;
+		n = efx_fill_loopback_test(efx,
+					   &tests->loopback[mode], mode, n,
+					   strings, data);
+	}
+
+	return n;
+}
+
 static int efx_ethtool_get_stats_count(struct net_device *net_dev)
 {
 	return EFX_ETHTOOL_NUM_STATS;
 }
 
+static int efx_ethtool_self_test_count(struct net_device *net_dev)
+{
+	struct efx_nic *efx = net_dev->priv;
+
+	return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
+}
+
 static void efx_ethtool_get_strings(struct net_device *net_dev,
 				    u32 string_set, u8 *strings)
 {
+	struct efx_nic *efx = net_dev->priv;
 	struct ethtool_string *ethtool_strings =
 		(struct ethtool_string *)strings;
 	int i;
 
-	if (string_set == ETH_SS_STATS)
+	switch (string_set) {
+	case ETH_SS_STATS:
 		for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++)
 			strncpy(ethtool_strings[i].name,
 				efx_ethtool_stats[i].name,
 				sizeof(ethtool_strings[i].name));
+		break;
+	case ETH_SS_TEST:
+		efx_ethtool_fill_self_tests(efx, NULL,
+					    ethtool_strings, NULL);
+		break;
+	default:
+		/* No other string sets */
+		break;
+	}
 }
 
 static void efx_ethtool_get_stats(struct net_device *net_dev,
@@ -330,6 +500,64 @@ static u32 efx_ethtool_get_rx_csum(struct net_device *net_dev)
 	return efx->rx_checksum_enabled;
 }
 
+static void efx_ethtool_self_test(struct net_device *net_dev,
+				  struct ethtool_test *test, u64 *data)
+{
+	struct efx_nic *efx = net_dev->priv;
+	struct efx_self_tests efx_tests;
+	int offline, already_up;
+	int rc;
+
+	ASSERT_RTNL();
+	if (efx->state != STATE_RUNNING) {
+		rc = -EIO;
+		goto fail1;
+	}
+
+	/* We need rx buffers and interrupts. */
+	already_up = (efx->net_dev->flags & IFF_UP);
+	if (!already_up) {
+		rc = dev_open(efx->net_dev);
+		if (rc) {
+			EFX_ERR(efx, "failed opening device.\n");
+			goto fail2;
+		}
+	}
+
+	memset(&efx_tests, 0, sizeof(efx_tests));
+	offline = (test->flags & ETH_TEST_FL_OFFLINE);
+
+	/* Perform online self tests first */
+	rc = efx_online_test(efx, &efx_tests);
+	if (rc)
+		goto out;
+
+	/* Perform offline tests only if online tests passed */
+	if (offline) {
+		/* Stop the kernel from sending packets during the test. */
+		efx_stop_queue(efx);
+		rc = efx_flush_queues(efx);
+		if (!rc)
+			rc = efx_offline_test(efx, &efx_tests,
+					      efx->loopback_modes);
+		efx_wake_queue(efx);
+	}
+
+ out:
+	if (!already_up)
+		dev_close(efx->net_dev);
+
+	EFX_LOG(efx, "%s all %sline self-tests\n",
+		rc == 0 ? "passed" : "failed", offline ? "off" : "on");
+
+ fail2:
+ fail1:
+	/* Fill ethtool results structures */
+	efx_ethtool_fill_self_tests(efx, &efx_tests, NULL, data);
+	if (rc)
+		test->flags |= ETH_TEST_FL_FAILED;
+}
+
 /* Restart autonegotiation */
 static int efx_ethtool_nway_reset(struct net_device *net_dev)
 {
@@ -480,6 +708,8 @@ struct ethtool_ops efx_ethtool_ops = {
 	.set_tso		= efx_ethtool_set_tso,
 	.get_flags		= ethtool_op_get_flags,
 	.set_flags		= ethtool_op_set_flags,
+	.self_test_count	= efx_ethtool_self_test_count,
+	.self_test		= efx_ethtool_self_test,
 	.get_strings		= efx_ethtool_get_strings,
 	.phys_id		= efx_ethtool_phys_id,
 	.get_stats_count	= efx_ethtool_get_stats_count,
diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c
index 247629c..b57cc68 100644
--- a/drivers/net/sfc/falcon.c
+++ b/drivers/net/sfc/falcon.c
@@ -1732,7 +1732,8 @@ void falcon_drain_tx_fifo(struct efx_nic *efx)
 	efx_oword_t temp;
 	int count;
 
-	if (FALCON_REV(efx) < FALCON_REV_B0)
+	if ((FALCON_REV(efx) < FALCON_REV_B0) ||
+	    (efx->loopback_mode != LOOPBACK_NONE))
 		return;
 
 	falcon_read(efx, &temp, MAC0_CTRL_REG_KER);
@@ -2092,6 +2093,8 @@ static int falcon_probe_phy(struct efx_nic *efx)
 			efx->phy_type);
 		return -1;
 	}
+
+	efx->loopback_modes = LOOPBACKS_10G_INTERNAL | efx->phy_op->loopbacks;
 	return 0;
 }
 
diff --git a/drivers/net/sfc/falcon_hwdefs.h b/drivers/net/sfc/falcon_hwdefs.h
index 0485a63..06e2d68 100644
--- a/drivers/net/sfc/falcon_hwdefs.h
+++ b/drivers/net/sfc/falcon_hwdefs.h
@@ -636,6 +636,14 @@
 #define XX_HIDRVA_WIDTH 1
 #define XX_LODRVA_LBN 8
 #define XX_LODRVA_WIDTH 1
+#define XX_LPBKD_LBN 3
+#define XX_LPBKD_WIDTH 1
+#define XX_LPBKC_LBN 2
+#define XX_LPBKC_WIDTH 1
+#define XX_LPBKB_LBN 1
+#define XX_LPBKB_WIDTH 1
+#define XX_LPBKA_LBN 0
+#define XX_LPBKA_WIDTH 1
 
 #define XX_TXDRV_CTL_REG_MAC 0x12
 #define XX_DEQD_LBN 28
@@ -656,8 +664,14 @@
 #define XX_DTXA_WIDTH 4
 
 /* XAUI XGXS core status register */
-#define XX_FORCE_SIG_DECODE_FORCED 0xff
 #define XX_CORE_STAT_REG_MAC 0x16
+#define XX_FORCE_SIG_LBN 24
+#define XX_FORCE_SIG_WIDTH 8
+#define XX_FORCE_SIG_DECODE_FORCED 0xff
+#define XX_XGXS_LB_EN_LBN 23
+#define XX_XGXS_LB_EN_WIDTH 1
+#define XX_XGMII_LB_EN_LBN 22
+#define XX_XGMII_LB_EN_WIDTH 1
 #define XX_ALIGN_DONE_LBN 20
 #define XX_ALIGN_DONE_WIDTH 1
 #define XX_SYNC_STAT_LBN 16
diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c
index b875c7b..a74b793 100644
--- a/drivers/net/sfc/falcon_xmac.c
+++ b/drivers/net/sfc/falcon_xmac.c
@@ -241,7 +241,7 @@ static void falcon_mask_status_intr(struct efx_nic *efx, int enable)
 {
 	efx_dword_t reg;
 
-	if (FALCON_REV(efx) < FALCON_REV_B0)
+	if ((FALCON_REV(efx) < FALCON_REV_B0) || LOOPBACK_INTERNAL(efx))
 		return;
 
 	/* Flush the ISR */
@@ -288,6 +288,9 @@ int falcon_xaui_link_ok(struct efx_nic *efx)
 	efx_dword_t reg;
 	int align_done, sync_status, link_ok = 0;
 
+	if (LOOPBACK_INTERNAL(efx))
+		return 1;
+
 	/* Read link status */
 	falcon_xmac_readl(efx, &reg, XX_CORE_STAT_REG_MAC);
 
@@ -378,6 +381,61 @@ static void falcon_reconfigure_xmac_core(struct efx_nic *efx)
 	falcon_xmac_writel(efx, &reg, XM_ADR_HI_REG_MAC);
 }
 
+static void falcon_reconfigure_xgxs_core(struct efx_nic *efx)
+{
+	efx_dword_t reg;
+	int xgxs_loopback = (efx->loopback_mode == LOOPBACK_XGXS) ? 1 : 0;
+	int xaui_loopback = (efx->loopback_mode == LOOPBACK_XAUI) ? 1 : 0;
+	int xgmii_loopback =
+		(efx->loopback_mode == LOOPBACK_XGMII) ? 1 : 0;
+
+	/* XGXS block is flaky and will need to be reset if moving
+	 * into our out of XGMII, XGXS or XAUI loopbacks. */
+	if (EFX_WORKAROUND_5147(efx)) {
+		int old_xgmii_loopback, old_xgxs_loopback, old_xaui_loopback;
+		int reset_xgxs;
+
+		falcon_xmac_readl(efx, &reg, XX_CORE_STAT_REG_MAC);
+		old_xgxs_loopback = EFX_DWORD_FIELD(reg, XX_XGXS_LB_EN);
+		old_xgmii_loopback = EFX_DWORD_FIELD(reg, XX_XGMII_LB_EN);
+
+		falcon_xmac_readl(efx, &reg, XX_SD_CTL_REG_MAC);
+		old_xaui_loopback = EFX_DWORD_FIELD(reg, XX_LPBKA);
+
+		/* The PHY driver may have turned XAUI off */
+		reset_xgxs = ((xgxs_loopback != old_xgxs_loopback) ||
+			      (xaui_loopback != old_xaui_loopback) ||
+			      (xgmii_loopback != old_xgmii_loopback));
+		if (reset_xgxs) {
+			falcon_xmac_readl(efx, &reg, XX_PWR_RST_REG_MAC);
+			EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSTX_EN, 1);
+			EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSRX_EN, 1);
+			falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+			udelay(1);
+			EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSTX_EN, 0);
+			EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSRX_EN, 0);
+			falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+			udelay(1);
+		}
+	}
+
+	falcon_xmac_readl(efx, &reg, XX_CORE_STAT_REG_MAC);
+	EFX_SET_DWORD_FIELD(reg, XX_FORCE_SIG,
+			    (xgxs_loopback || xaui_loopback) ?
+			    XX_FORCE_SIG_DECODE_FORCED : 0);
+	EFX_SET_DWORD_FIELD(reg, XX_XGXS_LB_EN, xgxs_loopback);
+	EFX_SET_DWORD_FIELD(reg, XX_XGMII_LB_EN, xgmii_loopback);
+	falcon_xmac_writel(efx, &reg, XX_CORE_STAT_REG_MAC);
+
+	falcon_xmac_readl(efx, &reg, XX_SD_CTL_REG_MAC);
+	EFX_SET_DWORD_FIELD(reg, XX_LPBKD, xaui_loopback);
+	EFX_SET_DWORD_FIELD(reg, XX_LPBKC, xaui_loopback);
+	EFX_SET_DWORD_FIELD(reg, XX_LPBKB, xaui_loopback);
+	EFX_SET_DWORD_FIELD(reg, XX_LPBKA, xaui_loopback);
+	falcon_xmac_writel(efx, &reg, XX_SD_CTL_REG_MAC);
+}
+
+
 /* Try and bring the Falcon side of the Falcon-Phy XAUI link fails
  * to come back up. Bash it until it comes back up */
 static int falcon_check_xaui_link_up(struct efx_nic *efx)
@@ -386,7 +444,8 @@ static int falcon_check_xaui_link_up(struct efx_nic *efx)
 	tries = EFX_WORKAROUND_5147(efx) ? 5 : 1;
 	max_tries = tries;
 
-	if (efx->phy_type == PHY_TYPE_NONE)
+	if ((efx->loopback_mode == LOOPBACK_NETWORK) ||
+	    (efx->phy_type == PHY_TYPE_NONE))
 		return 0;
 
 	while (tries) {
@@ -412,8 +471,13 @@ void falcon_reconfigure_xmac(struct efx_nic *efx)
 	falcon_mask_status_intr(efx, 0);
 
 	falcon_deconfigure_mac_wrapper(efx);
+
+	efx->tx_disabled = LOOPBACK_INTERNAL(efx);
 	efx->phy_op->reconfigure(efx);
+
+	falcon_reconfigure_xgxs_core(efx);
 	falcon_reconfigure_xmac_core(efx);
+
 	falcon_reconfigure_mac_wrapper(efx);
 
 	/* Ensure XAUI link is up */
@@ -500,6 +564,10 @@ int falcon_check_xmac(struct efx_nic *efx)
 	unsigned xaui_link_ok;
 	int rc;
 
+	if ((efx->loopback_mode == LOOPBACK_NETWORK) ||
+	    (efx->phy_type == PHY_TYPE_NONE))
+		return 0;
+
 	falcon_mask_status_intr(efx, 0);
 	xaui_link_ok = falcon_xaui_link_ok(efx);
 
diff --git a/drivers/net/sfc/mdio_10g.c b/drivers/net/sfc/mdio_10g.c
index dc06bb0..c4f540e 100644
--- a/drivers/net/sfc/mdio_10g.c
+++ b/drivers/net/sfc/mdio_10g.c
@@ -44,6 +44,9 @@ static int mdio_clause45_check_mmd(struct efx_nic *efx, int mmd,
 	int status;
 	int phy_id = efx->mii.phy_id;
 
+	if (LOOPBACK_INTERNAL(efx))
+		return 0;
+
 	/* Read MMD STATUS2 to check it is responding. */
 	status = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_STAT2);
 	if (((status >> MDIO_MMDREG_STAT2_PRESENT_LBN) &
@@ -164,6 +167,22 @@ int mdio_clause45_links_ok(struct efx_nic *efx, unsigned int mmd_mask)
 	int mmd = 0;
 	int good;
 
+	/* If the port is in loopback, then we should only consider a subset
+	 * of mmd's */
+	if (LOOPBACK_INTERNAL(efx))
+		return 1;
+	else if (efx->loopback_mode == LOOPBACK_NETWORK)
+		return 0;
+	else if (efx->loopback_mode == LOOPBACK_PHYXS)
+		mmd_mask &= ~(MDIO_MMDREG_DEVS0_PHYXS |
+			      MDIO_MMDREG_DEVS0_PCS |
+			      MDIO_MMDREG_DEVS0_PMAPMD);
+	else if (efx->loopback_mode == LOOPBACK_PCS)
+		mmd_mask &= ~(MDIO_MMDREG_DEVS0_PCS |
+			      MDIO_MMDREG_DEVS0_PMAPMD);
+	else if (efx->loopback_mode == LOOPBACK_PMAPMD)
+		mmd_mask &= ~MDIO_MMDREG_DEVS0_PMAPMD;
+
 	while (mmd_mask) {
 		if (mmd_mask & 1) {
 			/* Double reads because link state is latched, and a
@@ -182,6 +201,65 @@ int mdio_clause45_links_ok(struct efx_nic *efx, unsigned int mmd_mask)
 	return ok;
 }
 
+void mdio_clause45_transmit_disable(struct efx_nic *efx)
+{
+	int phy_id = efx->mii.phy_id;
+	int ctrl1, ctrl2;
+
+	ctrl1 = ctrl2 = mdio_clause45_read(efx, phy_id, MDIO_MMD_PMAPMD,
+					   MDIO_MMDREG_TXDIS);
+	if (efx->tx_disabled)
+		ctrl2 |= (1 << MDIO_MMDREG_TXDIS_GLOBAL_LBN);
+	else
+		ctrl1 &= ~(1 << MDIO_MMDREG_TXDIS_GLOBAL_LBN);
+	if (ctrl1 != ctrl2)
+		mdio_clause45_write(efx, phy_id, MDIO_MMD_PMAPMD,
+				    MDIO_MMDREG_TXDIS, ctrl2);
+}
+
+void mdio_clause45_phy_reconfigure(struct efx_nic *efx)
+{
+	int phy_id = efx->mii.phy_id;
+	int ctrl1, ctrl2;
+
+	/* Handle (with debouncing) PMA/PMD loopback */
+	ctrl1 = ctrl2 = mdio_clause45_read(efx, phy_id, MDIO_MMD_PMAPMD,
+					   MDIO_MMDREG_CTRL1);
+
+	if (efx->loopback_mode == LOOPBACK_PMAPMD)
+		ctrl2 |= (1 << MDIO_PMAPMD_CTRL1_LBACK_LBN);
+	else
+		ctrl2 &= ~(1 << MDIO_PMAPMD_CTRL1_LBACK_LBN);
+
+	if (ctrl1 != ctrl2)
+		mdio_clause45_write(efx, phy_id, MDIO_MMD_PMAPMD,
+				    MDIO_MMDREG_CTRL1, ctrl2);
+
+	/* Handle (with debouncing) PCS loopback */
+	ctrl1 = ctrl2 = mdio_clause45_read(efx, phy_id, MDIO_MMD_PCS,
+					   MDIO_MMDREG_CTRL1);
+	if (efx->loopback_mode == LOOPBACK_PCS)
+		ctrl2 |= (1 << MDIO_MMDREG_CTRL1_LBACK_LBN);
+	else
+		ctrl2 &= ~(1 << MDIO_MMDREG_CTRL1_LBACK_LBN);
+
+	if (ctrl1 != ctrl2)
+		mdio_clause45_write(efx, phy_id, MDIO_MMD_PCS,
+				    MDIO_MMDREG_CTRL1, ctrl2);
+
+	/* Handle (with debouncing) PHYXS network loopback */
+	ctrl1 = ctrl2 = mdio_clause45_read(efx, phy_id, MDIO_MMD_PHYXS,
+					   MDIO_MMDREG_CTRL1);
+	if (efx->loopback_mode == LOOPBACK_NETWORK)
+		ctrl2 |= (1 << MDIO_MMDREG_CTRL1_LBACK_LBN);
+	else
+		ctrl2 &= ~(1 << MDIO_MMDREG_CTRL1_LBACK_LBN);
+
+	if (ctrl1 != ctrl2)
+		mdio_clause45_write(efx, phy_id, MDIO_MMD_PHYXS,
+				    MDIO_MMDREG_CTRL1, ctrl2);
+}
+
 /**
  * mdio_clause45_get_settings - Read (some of) the PHY settings over MDIO.
  * @efx:		Efx NIC
diff --git a/drivers/net/sfc/mdio_10g.h b/drivers/net/sfc/mdio_10g.h
index 338c62c..cb99f3f 100644
--- a/drivers/net/sfc/mdio_10g.h
+++ b/drivers/net/sfc/mdio_10g.h
@@ -44,11 +44,16 @@
 #define MDIO_MMDREG_DEVS1	(6)
 #define MDIO_MMDREG_CTRL2	(7)
 #define MDIO_MMDREG_STAT2	(8)
+#define MDIO_MMDREG_TXDIS	(9)
 
 /* Bits in MMDREG_CTRL1 */
 /* Reset */
 #define MDIO_MMDREG_CTRL1_RESET_LBN	(15)
 #define MDIO_MMDREG_CTRL1_RESET_WIDTH	(1)
+/* Loopback */
+/* Loopback bit for WIS, PCS, PHYSX and DTEXS */
+#define MDIO_MMDREG_CTRL1_LBACK_LBN	(14)
+#define MDIO_MMDREG_CTRL1_LBACK_WIDTH	(1)
 
 /* Bits in MMDREG_STAT1 */
 #define MDIO_MMDREG_STAT1_FAULT_LBN	(7)
@@ -56,6 +61,9 @@
 /* Link state */
 #define MDIO_MMDREG_STAT1_LINK_LBN	(2)
 #define MDIO_MMDREG_STAT1_LINK_WIDTH	(1)
+/* Low power ability */
+#define MDIO_MMDREG_STAT1_LPABLE_LBN	(1)
+#define MDIO_MMDREG_STAT1_LPABLE_WIDTH	(1)
 
 /* Bits in ID reg */
 #define MDIO_ID_REV(_id32)	(_id32 & 0xf)
@@ -76,6 +84,14 @@
 #define MDIO_MMDREG_STAT2_PRESENT_LBN	(14)
 #define MDIO_MMDREG_STAT2_PRESENT_WIDTH (2)
 
+/* Bits in MMDREG_TXDIS */
+#define MDIO_MMDREG_TXDIS_GLOBAL_LBN    (0)
+#define MDIO_MMDREG_TXDIS_GLOBAL_WIDTH  (1)
+
+/* MMD-specific bits, ordered by MMD, then register */
+#define MDIO_PMAPMD_CTRL1_LBACK_LBN	(0)
+#define MDIO_PMAPMD_CTRL1_LBACK_WIDTH	(1)
+
 /* PMA type (4 bits) */
 #define MDIO_PMAPMD_CTRL2_10G_CX4	(0x0)
 #define MDIO_PMAPMD_CTRL2_10G_EW	(0x1)
@@ -217,6 +233,12 @@ int mdio_clause45_check_mmds(struct efx_nic *efx,
 extern int mdio_clause45_links_ok(struct efx_nic *efx,
 				  unsigned int mmd_mask);
 
+/* Generic transmit disable support though PMAPMD */
+extern void mdio_clause45_transmit_disable(struct efx_nic *efx);
+
+/* Generic part of reconfigure: set/clear loopback bits */
+extern void mdio_clause45_phy_reconfigure(struct efx_nic *efx);
+
 /* Read (some of) the PHY settings over MDIO */
 extern void mdio_clause45_get_settings(struct efx_nic *efx,
 				       struct ethtool_cmd *ecmd);
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
index 9c285fb..59f261b 100644
--- a/drivers/net/sfc/net_driver.h
+++ b/drivers/net/sfc/net_driver.h
@@ -448,6 +448,9 @@ struct efx_board {
 	struct efx_blinker blinker;
 };
 
+#define STRING_TABLE_LOOKUP(val, member)	\
+	member ## _names[val]
+
 enum efx_int_mode {
 	/* Be careful if altering to correct macro below */
 	EFX_INT_MODE_MSIX = 0,
@@ -520,6 +523,7 @@ enum efx_fc_type {
  * @check_hw: Check hardware
  * @reset_xaui: Reset XAUI side of PHY for (software sequenced reset)
  * @mmds: MMD presence mask
+ * @loopbacks: Supported loopback modes mask
  */
 struct efx_phy_operations {
 	int (*init) (struct efx_nic *efx);
@@ -529,6 +533,7 @@ struct efx_phy_operations {
 	int (*check_hw) (struct efx_nic *efx);
 	void (*reset_xaui) (struct efx_nic *efx);
 	int mmds;
+	unsigned loopbacks;
 };
 
 /*
@@ -667,6 +672,7 @@ union efx_multicast_hash {
  * @phy_op: PHY interface
  * @phy_data: PHY private data (including PHY-specific stats)
  * @mii: PHY interface
+ * @tx_disabled: PHY transmitter turned off
  * @link_up: Link status
  * @link_options: Link options (MII/GMII format)
  * @n_link_state_changes: Number of times the link has changed state
@@ -674,6 +680,9 @@ union efx_multicast_hash {
  * @multicast_hash: Multicast hash table
  * @flow_control: Flow control flags - separate RX/TX so can't use link_options
  * @reconfigure_work: work item for dealing with PHY events
+ * @loopback_mode: Loopback status
+ * @loopback_modes: Supported loopback mode bitmask
+ * @loopback_selftest: Offline self-test private state
  *
  * The @priv field of the corresponding &struct net_device points to
  * this.
@@ -733,6 +742,7 @@ struct efx_nic {
 	struct efx_phy_operations *phy_op;
 	void *phy_data;
 	struct mii_if_info mii;
+	unsigned tx_disabled;
 
 	int link_up;
 	unsigned int link_options;
@@ -744,6 +754,10 @@ struct efx_nic {
 	struct work_struct reconfigure_work;
 
 	atomic_t rx_reset;
+	enum efx_loopback_mode loopback_mode;
+	unsigned int loopback_modes;
+
+	void *loopback_selftest;
 };
 
 /**
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 9fd1984..6706223 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -19,6 +19,7 @@
 #include "rx.h"
 #include "efx.h"
 #include "falcon.h"
+#include "selftest.h"
 #include "workarounds.h"
 
 /* Number of RX descriptors pushed at once. */
@@ -683,6 +684,15 @@ void __efx_rx_packet(struct efx_channel *channel,
 	struct sk_buff *skb;
 	int lro = efx->net_dev->features & NETIF_F_LRO;
 
+	/* If we're in loopback test, then pass the packet directly to the
+	 * loopback layer, and free the rx_buf here
+	 */
+	if (unlikely(efx->loopback_selftest)) {
+		efx_loopback_rx_packet(efx, rx_buf->data, rx_buf->len);
+		efx_free_rx_buffer(efx, rx_buf);
+		goto done;
+	}
+
 	if (rx_buf->skb) {
 		prefetch(skb_shinfo(rx_buf->skb));
 
diff --git a/drivers/net/sfc/selftest.c b/drivers/net/sfc/selftest.c
new file mode 100644
index 0000000..cbda159
--- /dev/null
+++ b/drivers/net/sfc/selftest.c
@@ -0,0 +1,717 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/rtnetlink.h>
+#include <asm/io.h>
+#include "net_driver.h"
+#include "ethtool.h"
+#include "efx.h"
+#include "falcon.h"
+#include "selftest.h"
+#include "boards.h"
+#include "workarounds.h"
+#include "mac.h"
+
+/*
+ * Loopback test packet structure
+ *
+ * The self-test should stress every RSS vector, and unfortunately
+ * Falcon only performs RSS on TCP/UDP packets.
+ */
+struct efx_loopback_payload {
+	struct ethhdr header;
+	struct iphdr ip;
+	struct udphdr udp;
+	__be16 iteration;
+	const char msg[64];
+} __attribute__ ((packed));
+
+/* Loopback test source MAC address */
+static const unsigned char payload_source[ETH_ALEN] = {
+	0x00, 0x0f, 0x53, 0x1b, 0x1b, 0x1b,
+};
+
+static const char *payload_msg =
+	"Hello world! This is an Efx loopback test in progress!";
+
+/**
+ * efx_selftest_state - persistent state during a selftest
+ * @flush:		Drop all packets in efx_loopback_rx_packet
+ * @packet_count:	Number of packets being used in this test
+ * @skbs:		An array of skbs transmitted
+ * @rx_good:		RX good packet count
+ * @rx_bad:		RX bad packet count
+ * @payload:		Payload used in tests
+ */
+struct efx_selftest_state {
+	int flush;
+	int packet_count;
+	struct sk_buff **skbs;
+	atomic_t rx_good;
+	atomic_t rx_bad;
+	struct efx_loopback_payload payload;
+};
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ **************************************************************************/
+
+/* Level of loopback testing
+ *
+ * The maximum packet burst length is 16**(n-1), i.e.
+ *
+ * - Level 0 : no packets
+ * - Level 1 : 1 packet
+ * - Level 2 : 17 packets (1 * 1 packet, 1 * 16 packets)
+ * - Level 3 : 273 packets (1 * 1 packet, 1 * 16 packet, 1 * 256 packets)
+ *
+ */
+static unsigned int loopback_test_level = 3;
+
+/**************************************************************************
+ *
+ * Interrupt and event queue testing
+ *
+ **************************************************************************/
+
+/* Test generation and receipt of interrupts */
+static int efx_test_interrupts(struct efx_nic *efx,
+			       struct efx_self_tests *tests)
+{
+	struct efx_channel *channel;
+
+	EFX_LOG(efx, "testing interrupts\n");
+	tests->interrupt = -1;
+
+	/* Reset interrupt flag */
+	efx->last_irq_cpu = -1;
+	smp_wmb();
+
+	/* ACK each interrupting event queue. Receiving an interrupt due to
+	 * traffic before a test event is raised is considered a pass */
+	efx_for_each_channel_with_interrupt(channel, efx) {
+		if (channel->work_pending)
+			efx_process_channel_now(channel);
+		if (efx->last_irq_cpu >= 0)
+			goto success;
+	}
+
+	falcon_generate_interrupt(efx);
+
+	/* Wait for arrival of test interrupt. */
+	EFX_LOG(efx, "waiting for test interrupt\n");
+	schedule_timeout_uninterruptible(HZ / 10);
+	if (efx->last_irq_cpu >= 0)
+		goto success;
+
+	EFX_ERR(efx, "timed out waiting for interrupt\n");
+	return -ETIMEDOUT;
+
+ success:
+	EFX_LOG(efx, "test interrupt (mode %d) seen on CPU%d\n",
+		efx->interrupt_mode, efx->last_irq_cpu);
+	tests->interrupt = 1;
+	return 0;
+}
+
+/* Test generation and receipt of non-interrupting events */
+static int efx_test_eventq(struct efx_channel *channel,
+			   struct efx_self_tests *tests)
+{
+	unsigned int magic;
+
+	/* Channel specific code, limited to 20 bits */
+	magic = (0x00010150 + channel->channel);
+	EFX_LOG(channel->efx, "channel %d testing event queue with code %x\n",
+		channel->channel, magic);
+
+	tests->eventq_dma[channel->channel] = -1;
+	tests->eventq_int[channel->channel] = 1;	/* fake pass */
+	tests->eventq_poll[channel->channel] = 1;	/* fake pass */
+
+	/* Reset flag and zero magic word */
+	channel->efx->last_irq_cpu = -1;
+	channel->eventq_magic = 0;
+	smp_wmb();
+
+	falcon_generate_test_event(channel, magic);
+	udelay(1);
+
+	efx_process_channel_now(channel);
+	if (channel->eventq_magic != magic) {
+		EFX_ERR(channel->efx, "channel %d  failed to see test event\n",
+			channel->channel);
+		return -ETIMEDOUT;
+	} else {
+		tests->eventq_dma[channel->channel] = 1;
+	}
+
+	return 0;
+}
+
+/* Test generation and receipt of interrupting events */
+static int efx_test_eventq_irq(struct efx_channel *channel,
+			       struct efx_self_tests *tests)
+{
+	unsigned int magic, count;
+
+	/* Channel specific code, limited to 20 bits */
+	magic = (0x00010150 + channel->channel);
+	EFX_LOG(channel->efx, "channel %d testing event queue with code %x\n",
+		channel->channel, magic);
+
+	tests->eventq_dma[channel->channel] = -1;
+	tests->eventq_int[channel->channel] = -1;
+	tests->eventq_poll[channel->channel] = -1;
+
+	/* Reset flag and zero magic word */
+	channel->efx->last_irq_cpu = -1;
+	channel->eventq_magic = 0;
+	smp_wmb();
+
+	falcon_generate_test_event(channel, magic);
+
+	/* Wait for arrival of interrupt */
+	count = 0;
+	do {
+		schedule_timeout_uninterruptible(HZ / 100);
+
+		if (channel->work_pending)
+			efx_process_channel_now(channel);
+
+		if (channel->eventq_magic == magic)
+			goto eventq_ok;
+	} while (++count < 2);
+
+	EFX_ERR(channel->efx, "channel %d timed out waiting for event queue\n",
+		channel->channel);
+
+	/* See if interrupt arrived */
+	if (channel->efx->last_irq_cpu >= 0) {
+		EFX_ERR(channel->efx, "channel %d saw interrupt on CPU%d "
+			"during event queue test\n", channel->channel,
+			raw_smp_processor_id());
+		tests->eventq_int[channel->channel] = 1;
+	}
+
+	/* Check to see if event was received even if interrupt wasn't */
+	efx_process_channel_now(channel);
+	if (channel->eventq_magic == magic) {
+		EFX_ERR(channel->efx, "channel %d event was generated, but "
+			"failed to trigger an interrupt\n", channel->channel);
+		tests->eventq_dma[channel->channel] = 1;
+	}
+
+	return -ETIMEDOUT;
+ eventq_ok:
+	EFX_LOG(channel->efx, "channel %d event queue passed\n",
+		channel->channel);
+	tests->eventq_dma[channel->channel] = 1;
+	tests->eventq_int[channel->channel] = 1;
+	tests->eventq_poll[channel->channel] = 1;
+	return 0;
+}
+
+/**************************************************************************
+ *
+ * PHY testing
+ *
+ **************************************************************************/
+
+/* Check PHY presence by reading the PHY ID registers */
+static int efx_test_phy(struct efx_nic *efx,
+			struct efx_self_tests *tests)
+{
+	u16 physid1, physid2;
+	struct mii_if_info *mii = &efx->mii;
+	struct net_device *net_dev = efx->net_dev;
+
+	if (efx->phy_type == PHY_TYPE_NONE)
+		return 0;
+
+	EFX_LOG(efx, "testing PHY presence\n");
+	tests->phy_ok = -1;
+
+	physid1 = mii->mdio_read(net_dev, mii->phy_id, MII_PHYSID1);
+	physid2 = mii->mdio_read(net_dev, mii->phy_id, MII_PHYSID2);
+
+	if ((physid1 != 0x0000) && (physid1 != 0xffff) &&
+	    (physid2 != 0x0000) && (physid2 != 0xffff)) {
+		EFX_LOG(efx, "found MII PHY %d ID 0x%x:%x\n",
+			mii->phy_id, physid1, physid2);
+		tests->phy_ok = 1;
+		return 0;
+	}
+
+	EFX_ERR(efx, "no MII PHY present with ID %d\n", mii->phy_id);
+	return -ENODEV;
+}
+
+/**************************************************************************
+ *
+ * Loopback testing
+ * NB Only one loopback test can be executing concurrently.
+ *
+ **************************************************************************/
+
+/* Loopback test RX callback
+ * This is called for each received packet during loopback testing.
+ */
+void efx_loopback_rx_packet(struct efx_nic *efx,
+			    const char *buf_ptr, int pkt_len)
+{
+	struct efx_selftest_state *state = efx->loopback_selftest;
+	struct efx_loopback_payload *received;
+	struct efx_loopback_payload *payload;
+
+	BUG_ON(!buf_ptr);
+
+	/* If we are just flushing, then drop the packet */
+	if ((state == NULL) || state->flush)
+		return;
+
+	payload = &state->payload;
+	
+	received = (struct efx_loopback_payload *)(char *) buf_ptr;
+	received->ip.saddr = payload->ip.saddr;
+	received->ip.check = payload->ip.check;
+	
+	/* Check that header exists */
+	if (pkt_len < sizeof(received->header)) {
+		EFX_ERR(efx, "saw runt RX packet (length %d) in %s loopback "
+			"test\n", pkt_len, LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that the ethernet header exists */
+	if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) {
+		EFX_ERR(efx, "saw non-loopback RX packet in %s loopback test\n",
+			LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check packet length */
+	if (pkt_len != sizeof(*payload)) {
+		EFX_ERR(efx, "saw incorrect RX packet length %d (wanted %d) in "
+			"%s loopback test\n", pkt_len, (int)sizeof(*payload),
+			LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that IP header matches */
+	if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) {
+		EFX_ERR(efx, "saw corrupted IP header in %s loopback test\n",
+			LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that msg and padding matches */
+	if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) {
+		EFX_ERR(efx, "saw corrupted RX packet in %s loopback test\n",
+			LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that iteration matches */
+	if (received->iteration != payload->iteration) {
+		EFX_ERR(efx, "saw RX packet from iteration %d (wanted %d) in "
+			"%s loopback test\n", ntohs(received->iteration),
+			ntohs(payload->iteration), LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Increase correct RX count */
+	EFX_TRACE(efx, "got loopback RX in %s loopback test\n",
+		  LOOPBACK_MODE(efx));
+
+	atomic_inc(&state->rx_good);
+	return;
+
+ err:
+#ifdef EFX_ENABLE_DEBUG
+	if (atomic_read(&state->rx_bad) == 0) {
+		EFX_ERR(efx, "received packet:\n");
+		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+			       buf_ptr, pkt_len, 0);
+		EFX_ERR(efx, "expected packet:\n");
+		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+			       &state->payload, sizeof(state->payload), 0);
+	}
+#endif
+	atomic_inc(&state->rx_bad);
+}
+
+/* Initialise an efx_selftest_state for a new iteration */
+static void efx_iterate_state(struct efx_nic *efx)
+{
+	struct efx_selftest_state *state = efx->loopback_selftest;
+	struct net_device *net_dev = efx->net_dev;
+	struct efx_loopback_payload *payload = &state->payload;
+
+	/* Initialise the layerII header */
+	memcpy(&payload->header.h_dest, net_dev->dev_addr, ETH_ALEN);
+	memcpy(&payload->header.h_source, &payload_source, ETH_ALEN);
+	payload->header.h_proto = htons(ETH_P_IP);
+
+	/* saddr set later and used as incrementing count */
+	payload->ip.daddr = htonl(INADDR_LOOPBACK);
+	payload->ip.ihl = 5;
+	payload->ip.check = htons(0xdead);
+	payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr));
+	payload->ip.version = IPVERSION;
+	payload->ip.protocol = IPPROTO_UDP;
+
+	/* Initialise udp header */
+	payload->udp.source = 0;
+	payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) -
+				 sizeof(struct iphdr));
+	payload->udp.check = 0;	/* checksum ignored */
+
+	/* Fill out payload */
+	payload->iteration = htons(ntohs(payload->iteration) + 1);
+	memcpy(&payload->msg, payload_msg, sizeof(payload_msg));
+
+	/* Fill out remaining state members */
+	atomic_set(&state->rx_good, 0);
+	atomic_set(&state->rx_bad, 0);
+	smp_wmb();
+}
+
+static int efx_tx_loopback(struct efx_tx_queue *tx_queue)
+{
+	struct efx_nic *efx = tx_queue->efx;
+	struct efx_selftest_state *state = efx->loopback_selftest;
+	struct efx_loopback_payload *payload;
+	struct sk_buff *skb;
+	int i, rc;
+
+	/* Transmit N copies of buffer */
+	for (i = 0; i < state->packet_count; i++) {
+		/* Allocate an skb, holding an extra reference for 
+		 * transmit completion counting */
+		skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
+		if (!skb)
+			return -ENOMEM;
+		state->skbs[i] = skb;
+		skb_get(skb);
+
+		/* Copy the payload in, incrementing the source address to
+		 * exercise the rss vectors */
+		payload = ((struct efx_loopback_payload *)
+			   skb_put(skb, sizeof(state->payload)));
+		memcpy(payload, &state->payload, sizeof(state->payload));
+		payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
+
+		/* Ensure everything we've written is visible to the
+		 * interrupt handler. */
+		smp_wmb();
+
+		if (NET_DEV_REGISTERED(efx))
+			netif_tx_lock_bh(efx->net_dev);
+		rc = efx_xmit(efx, tx_queue, skb);
+		if (NET_DEV_REGISTERED(efx))
+			netif_tx_unlock_bh(efx->net_dev);
+
+		if (rc != NETDEV_TX_OK) {
+			EFX_ERR(efx, "TX queue %d could not transmit packet %d "
+				"of %d in %s loopback test\n", tx_queue->queue,
+				i + 1, state->packet_count, LOOPBACK_MODE(efx));
+
+			/* Defer cleaning up the other skbs for the caller */
+			kfree_skb(skb);
+			return -EPIPE;
+		}
+	}
+
+	return 0;
+}
+
+static int efx_rx_loopback(struct efx_tx_queue *tx_queue,
+			   struct efx_loopback_self_tests *lb_tests)
+{
+	struct efx_nic *efx = tx_queue->efx;
+	struct efx_selftest_state *state = efx->loopback_selftest;
+	struct sk_buff *skb;
+	int tx_done = 0, rx_good, rx_bad;
+	int i, rc = 0;
+
+	if (NET_DEV_REGISTERED(efx))
+		netif_tx_lock_bh(efx->net_dev);
+
+	/* Count the number of tx completions, and decrement the refcnt. Any
+	 * skbs not already completed will be free'd when the queue is flushed */
+	for (i=0; i < state->packet_count; i++) {
+		skb = state->skbs[i];
+		if (skb && !skb_shared(skb))
+			++tx_done;
+		dev_kfree_skb_any(skb);
+	}
+
+	if (NET_DEV_REGISTERED(efx))
+		netif_tx_unlock_bh(efx->net_dev);
+
+	/* Check TX completion and received packet counts */
+	rx_good = atomic_read(&state->rx_good);
+	rx_bad = atomic_read(&state->rx_bad);
+	if (tx_done != state->packet_count) {
+		/* Don't free the skbs; they will be picked up on TX
+		 * overflow or channel teardown.
+		 */
+		EFX_ERR(efx, "TX queue %d saw only %d out of an expected %d "
+			"TX completion events in %s loopback test\n",
+			tx_queue->queue, tx_done, state->packet_count,
+			LOOPBACK_MODE(efx));
+		rc = -ETIMEDOUT;
+		/* Allow to fall through so we see the RX errors as well */
+	}
+
+	/* We may always be up to a flush away from our desired packet total */
+	if (rx_good != state->packet_count) {
+		EFX_LOG(efx, "TX queue %d saw only %d out of an expected %d "
+			"received packets in %s loopback test\n",
+			tx_queue->queue, rx_good, state->packet_count,
+			LOOPBACK_MODE(efx));
+		rc = -ETIMEDOUT;
+		/* Fall through */
+	}
+
+	/* Update loopback test structure */
+	lb_tests->tx_sent[tx_queue->queue] += state->packet_count;
+	lb_tests->tx_done[tx_queue->queue] += tx_done;
+	lb_tests->rx_good += rx_good;
+	lb_tests->rx_bad += rx_bad;
+
+	return rc;
+}
+
+static int
+efx_test_loopback(struct efx_tx_queue *tx_queue,
+		  struct efx_loopback_self_tests *lb_tests)
+{
+	struct efx_nic *efx = tx_queue->efx;
+	struct efx_selftest_state *state = efx->loopback_selftest;
+	struct efx_channel *channel;
+	int i, rc = 0;
+
+	for (i = 0; i < loopback_test_level; i++) {
+		/* Determine how many packets to send */
+		state->packet_count = (efx->type->txd_ring_mask + 1) / 3;
+		state->packet_count = min(1 << (i << 2), state->packet_count);
+		state->skbs = kzalloc(sizeof(state->skbs[0]) *
+				      state->packet_count, GFP_KERNEL);
+		state->flush = 0;
+
+		EFX_LOG(efx, "TX queue %d testing %s loopback with %d "
+			"packets\n", tx_queue->queue, LOOPBACK_MODE(efx),
+			state->packet_count);
+
+		efx_iterate_state(efx);
+		rc = efx_tx_loopback(tx_queue);
+		
+		/* NAPI polling is not enabled, so process channels synchronously */
+		schedule_timeout_uninterruptible(HZ / 50);
+		efx_for_each_channel_with_interrupt(channel, efx) {
+			if (channel->work_pending)
+				efx_process_channel_now(channel);
+		}
+
+		rc |= efx_rx_loopback(tx_queue, lb_tests);
+		kfree(state->skbs);
+
+		if (rc) {
+			/* Wait a while to ensure there are no packets
+			 * floating around after a failure. */
+			schedule_timeout_uninterruptible(HZ / 10);
+			return rc;
+		}
+	}
+
+	EFX_LOG(efx, "TX queue %d passed %s loopback test with a burst length "
+		"of %d packets\n", tx_queue->queue, LOOPBACK_MODE(efx),
+		state->packet_count);
+
+	return rc;
+}
+
+static int efx_test_loopbacks(struct efx_nic *efx,
+			      struct efx_self_tests *tests,
+			      unsigned int loopback_modes)
+{
+	struct efx_selftest_state *state = efx->loopback_selftest;
+	struct ethtool_cmd ecmd, ecmd_loopback;
+	struct efx_tx_queue *tx_queue;
+	enum efx_loopback_mode old_mode, mode;
+	int count, rc = 0, link_up;
+	
+	rc = efx_ethtool_get_settings(efx->net_dev, &ecmd);
+	if (rc) {
+		EFX_ERR(efx, "could not get GMII settings\n");
+		return rc;
+	}
+	old_mode = efx->loopback_mode;
+
+	/* Disable autonegotiation for the purposes of loopback */
+	memcpy(&ecmd_loopback, &ecmd, sizeof(ecmd_loopback));
+	if (ecmd_loopback.autoneg == AUTONEG_ENABLE) {
+		ecmd_loopback.autoneg = AUTONEG_DISABLE;
+		ecmd_loopback.duplex = DUPLEX_FULL;
+		ecmd_loopback.speed = SPEED_10000;
+	}
+
+	rc = efx_ethtool_set_settings(efx->net_dev, &ecmd_loopback);
+	if (rc) {
+		EFX_ERR(efx, "could not disable autonegotiation\n");
+		goto out;
+	}
+	tests->loopback_speed = ecmd_loopback.speed;
+	tests->loopback_full_duplex = ecmd_loopback.duplex;
+
+	/* Test all supported loopback modes */
+	for (mode = LOOPBACK_NONE; mode < LOOPBACK_TEST_MAX; mode++) {
+		if (!(loopback_modes & (1 << mode)))
+			continue;
+
+		/* Move the port into the specified loopback mode. */
+		state->flush = 1;
+		efx->loopback_mode = mode;
+		efx_reconfigure_port(efx);
+
+		/* Wait for the PHY to signal the link is up */
+		count = 0;
+		do {
+			struct efx_channel *channel = &efx->channel[0];
+
+			falcon_check_xmac(efx);
+			schedule_timeout_uninterruptible(HZ / 10);
+			if (channel->work_pending)
+				efx_process_channel_now(channel);
+			/* Wait for PHY events to be processed */
+			flush_workqueue(efx->workqueue);
+			rmb();
+
+			/* efx->link_up can be 1 even if the XAUI link is down,
+			 * (bug5762). Usually, it's not worth bothering with the
+			 * difference, but for selftests, we need that extra
+			 * guarantee that the link is really, really, up.
+			 */
+			link_up = efx->link_up;
+			if (!falcon_xaui_link_ok(efx))
+				link_up = 0;
+
+		} while ((++count < 20) && !link_up);
+
+		/* The link should now be up. If it isn't, there is no point
+		 * in attempting a loopback test */
+		if (!link_up) {
+			EFX_ERR(efx, "loopback %s never came up\n",
+				LOOPBACK_MODE(efx));
+			rc = -EIO;
+			goto out;
+		}
+
+		EFX_LOG(efx, "link came up in %s loopback in %d iterations\n",
+			LOOPBACK_MODE(efx), count);
+
+		/* Test every TX queue */
+		efx_for_each_tx_queue(tx_queue, efx) {
+			rc |= efx_test_loopback(tx_queue,
+						&tests->loopback[mode]);
+			if (rc)
+				goto out;
+		}
+	}
+
+ out:
+	/* Take out of loopback and restore PHY settings */
+	state->flush = 1;
+	efx->loopback_mode = old_mode;
+	efx_ethtool_set_settings(efx->net_dev, &ecmd);
+
+	return rc;
+}
+
+/**************************************************************************
+ *
+ * Entry points
+ *
+ *************************************************************************/
+
+/* Online (i.e. non-disruptive) testing
+ * This checks interrupt generation, event delivery and PHY presence. */
+int efx_online_test(struct efx_nic *efx, struct efx_self_tests *tests)
+{
+	struct efx_channel *channel;
+	int rc = 0;
+
+	EFX_LOG(efx, "performing online self-tests\n");
+
+	rc |= efx_test_interrupts(efx, tests);
+	efx_for_each_channel(channel, efx) {
+		if (channel->has_interrupt)
+			rc |= efx_test_eventq_irq(channel, tests);
+		else
+			rc |= efx_test_eventq(channel, tests);
+	}
+	rc |= efx_test_phy(efx, tests);
+
+	if (rc)
+		EFX_ERR(efx, "failed online self-tests\n");
+
+	return rc;
+}
+
+/* Offline (i.e. disruptive) testing
+ * This checks MAC and PHY loopback on the specified port. */
+int efx_offline_test(struct efx_nic *efx,
+		     struct efx_self_tests *tests, unsigned int loopback_modes)
+{
+	struct efx_selftest_state *state;
+	int rc = 0;
+
+	EFX_LOG(efx, "performing offline self-tests\n");
+
+	/* Create a selftest_state structure to hold state for the test */
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Set the port loopback_selftest member. From this point on
+	 * all received packets will be dropped. Mark the state as
+	 * "flushing" so all inflight packets are dropped */
+	BUG_ON(efx->loopback_selftest);
+	state->flush = 1;
+	efx->loopback_selftest = (void *)state;
+
+	rc = efx_test_loopbacks(efx, tests, loopback_modes);
+
+	efx->loopback_selftest = NULL;
+	wmb();
+	kfree(state);
+
+ out:
+	if (rc)
+		EFX_ERR(efx, "failed offline self-tests\n");
+
+	return rc;
+}
+
diff --git a/drivers/net/sfc/selftest.h b/drivers/net/sfc/selftest.h
new file mode 100644
index 0000000..f6999c2
--- /dev/null
+++ b/drivers/net/sfc/selftest.h
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_SELFTEST_H
+#define EFX_SELFTEST_H
+
+#include "net_driver.h"
+
+/*
+ * Self tests
+ */
+
+struct efx_loopback_self_tests {
+	int tx_sent[EFX_MAX_TX_QUEUES];
+	int tx_done[EFX_MAX_TX_QUEUES];
+	int rx_good;
+	int rx_bad;
+};
+
+/* Efx self test results
+ * For fields which are not counters, 1 indicates success and -1
+ * indicates failure.
+ */
+struct efx_self_tests {
+	int interrupt;
+	int eventq_dma[EFX_MAX_CHANNELS];
+	int eventq_int[EFX_MAX_CHANNELS];
+	int eventq_poll[EFX_MAX_CHANNELS];
+	int phy_ok;
+	int loopback_speed;
+	int loopback_full_duplex;
+	struct efx_loopback_self_tests loopback[LOOPBACK_TEST_MAX];
+};
+
+extern void efx_loopback_rx_packet(struct efx_nic *efx,
+				   const char *buf_ptr, int pkt_len);
+extern int efx_online_test(struct efx_nic *efx,
+			   struct efx_self_tests *tests);
+extern int efx_offline_test(struct efx_nic *efx,
+			    struct efx_self_tests *tests,
+			    unsigned int loopback_modes);
+
+#endif /* EFX_SELFTEST_H */
diff --git a/drivers/net/sfc/tenxpress.c b/drivers/net/sfc/tenxpress.c
index d8df031..b1cd6de 100644
--- a/drivers/net/sfc/tenxpress.c
+++ b/drivers/net/sfc/tenxpress.c
@@ -24,6 +24,11 @@
 				 MDIO_MMDREG_DEVS0_PCS    | \
 				 MDIO_MMDREG_DEVS0_PHYXS)
 
+#define TENXPRESS_LOOPBACKS ((1 << LOOPBACK_PHYXS) |	\
+			     (1 << LOOPBACK_PCS) |	\
+			     (1 << LOOPBACK_PMAPMD) |	\
+			     (1 << LOOPBACK_NETWORK))
+
 /* We complain if we fail to see the link partner as 10G capable this many
  * times in a row (must be > 1 as sampling the autoneg. registers is racy)
  */
@@ -72,6 +77,10 @@
 #define PMA_PMD_BIST_RXD_LBN	(1)
 #define PMA_PMD_BIST_AFE_LBN	(0)
 
+/* Special Software reset register */
+#define PMA_PMD_EXT_CTRL_REG 49152
+#define PMA_PMD_EXT_SSR_LBN 15
+
 #define BIST_MAX_DELAY	(1000)
 #define BIST_POLL_DELAY	(10)
 
@@ -86,6 +95,11 @@
 #define	PCS_TEST_SELECT_REG 0xd807	/* PRM 10.5.8 */
 #define	CLK312_EN_LBN 3
 
+/* PHYXS registers */
+#define PHYXS_TEST1         (49162)
+#define LOOPBACK_NEAR_LBN   (8)
+#define LOOPBACK_NEAR_WIDTH (1)
+
 /* Boot status register */
 #define PCS_BOOT_STATUS_REG	(0xd000)
 #define PCS_BOOT_FATAL_ERR_LBN	(0)
@@ -106,7 +120,9 @@ MODULE_PARM_DESC(crc_error_reset_threshold,
 
 struct tenxpress_phy_data {
 	enum tenxpress_state state;
+	enum efx_loopback_mode loopback_mode;
 	atomic_t bad_crc_count;
+	int tx_disabled;
 	int bad_lp_tries;
 };
 
@@ -227,6 +243,35 @@ static int tenxpress_phy_init(struct efx_nic *efx)
 	return rc;
 }
 
+static int tenxpress_special_reset(struct efx_nic *efx)
+{
+	int rc, reg;
+
+	EFX_TRACE(efx, "%s\n", __func__);
+
+	/* Initiate reset */
+	reg = mdio_clause45_read(efx, efx->mii.phy_id,
+				 MDIO_MMD_PMAPMD, PMA_PMD_EXT_CTRL_REG);
+	reg |= (1 << PMA_PMD_EXT_SSR_LBN);
+	mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD,
+			    PMA_PMD_EXT_CTRL_REG, reg);
+
+	msleep(200);
+
+	/* Wait for the blocks to come out of reset */
+	rc = mdio_clause45_wait_reset_mmds(efx,
+					   TENXPRESS_REQUIRED_DEVS);
+	if (rc < 0)
+		return rc;
+
+	/* Try and reconfigure the device */
+	rc = tenxpress_init(efx);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
 static void tenxpress_set_bad_lp(struct efx_nic *efx, int bad_lp)
 {
 	struct tenxpress_phy_data *pd = efx->phy_data;
@@ -301,11 +346,46 @@ static int tenxpress_link_ok(struct efx_nic *efx, int check_lp)
 	return ok;
 }
 
+static void tenxpress_phyxs_loopback(struct efx_nic *efx)
+{
+	int phy_id = efx->mii.phy_id;
+	int ctrl1, ctrl2;
+
+	ctrl1 = ctrl2 = mdio_clause45_read(efx, phy_id, MDIO_MMD_PHYXS,
+					   PHYXS_TEST1);
+	if (efx->loopback_mode == LOOPBACK_PHYXS)
+		ctrl2 |= (1 << LOOPBACK_NEAR_LBN);
+	else
+		ctrl2 &= ~(1 << LOOPBACK_NEAR_LBN);
+	if (ctrl1 != ctrl2)
+		mdio_clause45_write(efx, phy_id, MDIO_MMD_PHYXS,
+				    PHYXS_TEST1, ctrl2);
+}
+
 static void tenxpress_phy_reconfigure(struct efx_nic *efx)
 {
+	struct tenxpress_phy_data *phy_data = efx->phy_data;
+	int loop_change = LOOPBACK_OUT_OF(phy_data, efx,
+					  TENXPRESS_LOOPBACKS);
+
 	if (!tenxpress_state_is(efx, TENXPRESS_STATUS_NORMAL))
 		return;
 
+	/* When coming out of transmit disable, coming out of low power
+	 * mode, or moving out of any PHY internal loopback mode,
+	 * perform a special software reset */
+	if ((phy_data->tx_disabled && !efx->tx_disabled) ||
+	    loop_change) {
+		(void) tenxpress_special_reset(efx);
+		falcon_reset_xaui(efx);
+	}
+
+	mdio_clause45_transmit_disable(efx);
+	mdio_clause45_phy_reconfigure(efx);
+	tenxpress_phyxs_loopback(efx);
+
+	phy_data->tx_disabled = efx->tx_disabled;
+	phy_data->loopback_mode = efx->loopback_mode;
 	efx->link_up = tenxpress_link_ok(efx, 0);
 	efx->link_options = GM_LPA_10000FULL;
 }
@@ -433,4 +513,5 @@ struct efx_phy_operations falcon_tenxpress_phy_ops = {
 	.clear_interrupt  = tenxpress_phy_clear_interrupt,
 	.reset_xaui       = tenxpress_reset_xaui,
 	.mmds             = TENXPRESS_REQUIRED_DEVS,
+	.loopbacks        = TENXPRESS_LOOPBACKS,
 };
diff --git a/drivers/net/sfc/xfp_phy.c b/drivers/net/sfc/xfp_phy.c
index 66dd5bf..3b9f9dd 100644
--- a/drivers/net/sfc/xfp_phy.c
+++ b/drivers/net/sfc/xfp_phy.c
@@ -24,6 +24,10 @@
 			   MDIO_MMDREG_DEVS0_PMAPMD |	\
 			   MDIO_MMDREG_DEVS0_PHYXS)
 
+#define XFP_LOOPBACKS ((1 << LOOPBACK_PCS) |		\
+		       (1 << LOOPBACK_PMAPMD) |		\
+		       (1 << LOOPBACK_NETWORK))
+
 /****************************************************************************/
 /* Quake-specific MDIO registers */
 #define MDIO_QUAKE_LED0_REG	(0xD006)
@@ -35,6 +39,10 @@ void xfp_set_led(struct efx_nic *p, int led, int mode)
 			    mode);
 }
 
+struct xfp_phy_data {
+	int tx_disabled;
+};
+
 #define XFP_MAX_RESET_TIME 500
 #define XFP_RESET_WAIT 10
 
@@ -72,18 +80,31 @@ static int xfp_reset_phy(struct efx_nic *efx)
 
 static int xfp_phy_init(struct efx_nic *efx)
 {
+	struct xfp_phy_data *phy_data;
 	u32 devid = mdio_clause45_read_id(efx, MDIO_MMD_PHYXS);
 	int rc;
 
+	phy_data = kzalloc(sizeof(struct xfp_phy_data), GFP_KERNEL);
+	efx->phy_data = (void *) phy_data;
+
 	EFX_INFO(efx, "XFP: PHY ID reg %x (OUI %x model %x revision"
 		 " %x)\n", devid, MDIO_ID_OUI(devid), MDIO_ID_MODEL(devid),
 		 MDIO_ID_REV(devid));
 
+	phy_data->tx_disabled = efx->tx_disabled;
+
 	rc = xfp_reset_phy(efx);
 
 	EFX_INFO(efx, "XFP: PHY init %s.\n",
 		 rc ? "failed" : "successful");
+	if (rc < 0)
+		goto fail;
 
+	return 0;
+
+ fail:
+	kfree(efx->phy_data);
+	efx->phy_data = NULL;
 	return rc;
 }
 
@@ -110,6 +131,16 @@ static int xfp_phy_check_hw(struct efx_nic *efx)
 
 static void xfp_phy_reconfigure(struct efx_nic *efx)
 {
+	struct xfp_phy_data *phy_data = efx->phy_data;
+
+	/* Reset the PHY when moving from tx off to tx on */
+	if (phy_data->tx_disabled && !efx->tx_disabled)
+		xfp_reset_phy(efx);
+
+	mdio_clause45_transmit_disable(efx);
+	mdio_clause45_phy_reconfigure(efx);
+
+	phy_data->tx_disabled = efx->tx_disabled;
 	efx->link_up = xfp_link_ok(efx);
 	efx->link_options = GM_LPA_10000FULL;
 }
@@ -119,6 +150,10 @@ static void xfp_phy_fini(struct efx_nic *efx)
 {
 	/* Clobber the LED if it was blinking */
 	efx->board_info.blink(efx, 0);
+
+	/* Free the context block */
+	kfree(efx->phy_data);
+	efx->phy_data = NULL;
 }
 
 struct efx_phy_operations falcon_xfp_phy_ops = {
@@ -129,4 +164,5 @@ struct efx_phy_operations falcon_xfp_phy_ops = {
 	.clear_interrupt = xfp_phy_clear_interrupt,
 	.reset_xaui      = efx_port_dummy_op_void,
 	.mmds            = XFP_REQUIRED_DEVS,
+	.loopbacks       = XFP_LOOPBACKS,
 };
-- 
cgit v0.10.2


From a300344ab9b77130310fc225fdc7677e129b1163 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue, 6 May 2008 14:34:35 -0700
Subject: sky2: fix simple define thinko

noticed while browsing code, apparent thinko.  compile tested only.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
CC: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h
index 7bb3ba9..c0a5eea 100644
--- a/drivers/net/sky2.h
+++ b/drivers/net/sky2.h
@@ -1966,13 +1966,13 @@ struct sky2_status_le {
 struct tx_ring_info {
 	struct sk_buff	*skb;
 	DECLARE_PCI_UNMAP_ADDR(mapaddr);
-	DECLARE_PCI_UNMAP_ADDR(maplen);
+	DECLARE_PCI_UNMAP_LEN(maplen);
 };
 
 struct rx_ring_info {
 	struct sk_buff	*skb;
 	dma_addr_t	data_addr;
-	DECLARE_PCI_UNMAP_ADDR(data_size);
+	DECLARE_PCI_UNMAP_LEN(data_size);
 	dma_addr_t	frag_addr[ETH_JUMBO_MTU >> PAGE_SHIFT];
 };
 
-- 
cgit v0.10.2


From e21fd4f07dd0c2630c3db41f419e4c658d0dee2c Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Thu, 8 May 2008 11:33:03 +0100
Subject: DM9000: Add __devinit and __devexit attributes to probe and remove

There were missing __dev* annotations for the dm9000_probe()
and dm9000_drv_remove() functions.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index e6fe261..273e654a 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -503,7 +503,7 @@ dm9000_release_board(struct platform_device *pdev, struct board_info *db)
 /*
  * Search DM9000 board, allocate space and register it
  */
-static int
+static int __devinit
 dm9000_probe(struct platform_device *pdev)
 {
 	struct dm9000_plat_data *pdata = pdev->dev.platform_data;
@@ -1372,7 +1372,7 @@ dm9000_drv_resume(struct platform_device *dev)
 	return 0;
 }
 
-static int
+static int __devexit
 dm9000_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
@@ -1393,7 +1393,7 @@ static struct platform_driver dm9000_driver = {
 		.owner	 = THIS_MODULE,
 	},
 	.probe   = dm9000_probe,
-	.remove  = dm9000_drv_remove,
+	.remove  = __devexit_p(dm9000_drv_remove),
 	.suspend = dm9000_drv_suspend,
 	.resume  = dm9000_drv_resume,
 };
-- 
cgit v0.10.2


From 37d5dca6af6b62bbb2c63f46a06cb07d0cf4522b Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Date: Thu, 8 May 2008 11:35:13 +0100
Subject: DM9000: Update and fix driver debugging messages

There was a missing newline in a dev_dbg() message.  Values read
from/written into PHY registers might be for interest too, so I added
new dbg messages there.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 273e654a..7c49f33 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -525,7 +525,7 @@ dm9000_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
-	dev_dbg(&pdev->dev, "dm9000_probe()");
+	dev_dbg(&pdev->dev, "dm9000_probe()\n");
 
 	/* setup board info structure */
 	db = (struct board_info *) ndev->priv;
@@ -1288,6 +1288,8 @@ dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg)
 	spin_unlock_irqrestore(&db->lock,flags);
 
 	mutex_unlock(&db->addr_lock);
+
+	dm9000_dbg(db, 5, "phy_read[%02x] -> %04x\n", reg, ret);
 	return ret;
 }
 
@@ -1301,6 +1303,7 @@ dm9000_phy_write(struct net_device *dev, int phyaddr_unused, int reg, int value)
 	unsigned long flags;
 	unsigned long reg_save;
 
+	dm9000_dbg(db, 5, "phy_write[%02x] = %04x\n", reg, value);
 	mutex_lock(&db->addr_lock);
 
 	spin_lock_irqsave(&db->lock,flags);
-- 
cgit v0.10.2


From 8f5bf5f25cdf9270f33ed347c582a3a451d3c38a Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 8 May 2008 11:36:42 +0100
Subject: DM9000: Use delayed work to update MII PHY state

Periodically check the MII PHY status to ensure that the
network layer's link status is updated and the user informed
of any changes.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 7c49f33..d45bcd2 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -117,6 +117,9 @@ typedef struct board_info {
 
 	struct mutex	 addr_lock;	/* phy and eeprom access lock */
 
+	struct delayed_work phy_poll;
+	struct net_device  *ndev;
+
 	spinlock_t lock;
 
 	struct mii_if_info mii;
@@ -297,6 +300,10 @@ static void dm9000_set_io(struct board_info *db, int byte_width)
 	}
 }
 
+static void dm9000_schedule_poll(board_info_t *db)
+{
+	schedule_delayed_work(&db->phy_poll, HZ * 2);
+}
 
 /* Our watchdog timed out. Called by the networking layer */
 static void dm9000_timeout(struct net_device *dev)
@@ -465,6 +472,17 @@ static const struct ethtool_ops dm9000_ethtool_ops = {
  	.set_eeprom		= dm9000_set_eeprom,
 };
 
+static void
+dm9000_poll_work(struct work_struct *w)
+{
+	struct delayed_work *dw = container_of(w, struct delayed_work, work);
+	board_info_t *db = container_of(dw, board_info_t, phy_poll);
+
+	mii_check_media(&db->mii, netif_msg_link(db), 0);
+	
+	if (netif_running(db->ndev))
+		dm9000_schedule_poll(db);
+}
 
 /* dm9000_release_board
  *
@@ -532,10 +550,14 @@ dm9000_probe(struct platform_device *pdev)
 	memset(db, 0, sizeof (*db));
 
 	db->dev = &pdev->dev;
+	db->ndev = ndev;
 
 	spin_lock_init(&db->lock);
 	mutex_init(&db->addr_lock);
 
+	INIT_DELAYED_WORK(&db->phy_poll, dm9000_poll_work);
+
+
 	if (pdev->num_resources < 2) {
 		ret = -ENODEV;
 		goto out;
@@ -761,6 +783,8 @@ dm9000_open(struct net_device *dev)
 
 	mii_check_media(&db->mii, netif_msg_link(db), 1);
 	netif_start_queue(dev);
+	
+	dm9000_schedule_poll(db);
 
 	return 0;
 }
@@ -879,6 +903,8 @@ dm9000_stop(struct net_device *ndev)
 	if (netif_msg_ifdown(db))
 		dev_dbg(db->dev, "shutting down %s\n", ndev->name);
 
+	cancel_delayed_work(&db->phy_poll);
+
 	netif_stop_queue(ndev);
 	netif_carrier_off(ndev);
 
-- 
cgit v0.10.2


From 993245908ec35c071315479e20602577b7b5dde6 Mon Sep 17 00:00:00 2001
From: Auke Kok <auke-jan.h.kok@intel.com>
Date: Wed, 7 May 2008 13:42:33 -0700
Subject: New maintainer for Intel ethernet adapters

I'm handing over maintainership to Jeff Kirsher and moving on
to other Linux/Open Source work within Intel. Good luck to Jeff ;)

Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index c3a533d..0cc47b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2104,12 +2104,10 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 
 INTEL ETHERNET DRIVERS (e100/e1000/e1000e/igb/ixgb/ixgbe)
-P:	Auke Kok
-M:	auke-jan.h.kok@intel.com
-P:	Jesse Brandeburg
-M:	jesse.brandeburg@intel.com
 P:	Jeff Kirsher
 M:	jeffrey.t.kirsher@intel.com
+P:	Jesse Brandeburg
+M:	jesse.brandeburg@intel.com
 P:	Bruce Allan
 M:	bruce.w.allan@intel.com
 P:	John Ronciak
-- 
cgit v0.10.2


From a1c1f281b84a751fdb5ff919da3b09df7297619f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Tue, 13 May 2008 02:53:26 -0700
Subject: tcp FRTO: Fix fallback to conventional recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It seems that commit 009a2e3e4ec ("[TCP] FRTO: Improve
interoperability with other undo_marker users") run into
another land-mine which caused fallback to conventional
recovery to break:

1. Cumulative ACK arrives after FRTO retransmission
2. tcp_try_to_open sees zero retrans_out, clears retrans_stamp
   which should be kept like in CA_Loss state it would be
3. undo_marker change allowed tcp_packet_delayed to return
   true because of the cleared retrans_stamp once FRTO is
   terminated causing LossUndo to occur, which means all loss
   markings FRTO made are reverted.

This means that the conventional recovery basically recovered
one loss per RTT, which is not that efficient. It was quite
unobvious that the undo_marker change broken something like
this, I had a quite long session to track it down because of
the non-intuitiviness of the bug (luckily I had a trivial
reproducer at hand and I was also able to learn to use kprobes
in the process as well :-)).

This together with the NewReno+FRTO fix and FRTO in-order
workaround this fixes Damon's problems, this and the first
mentioned are enough to fix Bugzilla #10063.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Damon L. Chesser <damon@damtek.com>
Tested-by: Damon L. Chesser <damon@damtek.com>
Tested-by: Sebastian Hyrwall <zibbe@cisko.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 26c9369..d6edb98 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2482,7 +2482,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 
 	tcp_verify_left_out(tp);
 
-	if (tp->retrans_out == 0)
+	if (!tp->frto_counter && tp->retrans_out == 0)
 		tp->retrans_stamp = 0;
 
 	if (flag & FLAG_ECE)
-- 
cgit v0.10.2


From 79d44516b4b178ffb6e2159c75584cfcfc097914 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Tue, 13 May 2008 02:54:19 -0700
Subject: tcp FRTO: work-around inorder receivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If receiver consumes segments successfully only in-order, FRTO
fallback to conventional recovery produces RTO loop because
FRTO's forward transmissions will always get dropped and need to
be resent, yet by default they're not marked as lost (which are
the only segments we will retransmit in CA_Loss).

Price to pay about this is occassionally unnecessarily
retransmitting the forward transmission(s). SACK blocks help
a bit to avoid this, so it's mainly a concern for NewReno case
though SACK is not fully immune either.

This change has a side-effect of fixing SACKFRTO problem where
it didn't have snd_nxt of the RTO time available anymore when
fallback become necessary (this problem would have only occured
when RTO would occur for two or more segments and ECE arrives
in step 3; no need to figure out how to fix that unless the
TODO item of selective behavior is considered in future).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Damon L. Chesser <damon@damtek.com>
Tested-by: Damon L. Chesser <damon@damtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6edb98..b54d9d3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1842,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		}
 
-		/* Don't lost mark skbs that were fwd transmitted after RTO */
-		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
-		    !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
+		/* Marking forward transmissions that were made after RTO lost
+		 * can cause unnecessary retransmissions in some scenarios,
+		 * SACK blocks will mitigate that in some but not in all cases.
+		 * We used to not mark them but it was causing break-ups with
+		 * receivers that do only in-order receival.
+		 *
+		 * TODO: we could detect presence of such receiver and select
+		 * different behavior per flow.
+		 */
+		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
 		}
@@ -1860,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->reordering = min_t(unsigned int, tp->reordering,
 			       sysctl_tcp_reordering);
 	tcp_set_ca_state(sk, TCP_CA_Loss);
-	tp->high_seq = tp->frto_highmark;
+	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
 
 	tcp_clear_retrans_hints_partial(tp);
-- 
cgit v0.10.2


From 5e0f8923f350ff522f8f6aecf198df045af3615f Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 13 May 2008 23:23:55 -0700
Subject: cipso: Relax too much careful cipso hash function.

The cipso_v4_cache is allocated to contain CIPSO_V4_CACHE_BUCKETS
buckets. The CIPSO_V4_CACHE_BUCKETS = 1 << CIPSO_V4_CACHE_BUCKETBITS,
where CIPSO_V4_CACHE_BUCKETBITS = 7.

The bucket-selection function for this hash is calculated like this:

  bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
                                     ^^^

i.e. picking only 4 buckets of possible 128 :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 05afb57..2c0e457 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -338,7 +338,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
 		return -ENOENT;
 
 	hash = cipso_v4_map_cache_hash(key, key_len);
-	bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	bkt = hash & (CIPSO_V4_CACHE_BUCKETS - 1);
 	spin_lock_bh(&cipso_v4_cache[bkt].lock);
 	list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
 		if (entry->hash == hash &&
@@ -417,7 +417,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
 	atomic_inc(&secattr->cache->refcount);
 	entry->lsm_data = secattr->cache;
 
-	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
 	spin_lock_bh(&cipso_v4_cache[bkt].lock);
 	if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
 		list_add(&entry->list, &cipso_v4_cache[bkt].list);
-- 
cgit v0.10.2


From c1cc678adaa78ae2aab6a6d699241ad516d84476 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 May 2008 23:25:00 -0700
Subject: sctp: Fix use of uninitialized pointer

Introduced by c4492586 (sctp: Add address type check while process
paramaters of ASCONF chunk):

net/sctp/sm_make_chunk.c: In function 'sctp_process_asconf':
net/sctp/sm_make_chunk.c:2828: warning: 'addr_param' may be used uninitialized in this function
net/sctp/sm_make_chunk.c:2828: note: 'addr_param' was declared here

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6eeee53..bbc7107 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2827,6 +2827,9 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 	union sctp_addr	addr;
 	union sctp_addr_param *addr_param;
 
+	addr_param = (union sctp_addr_param *)
+			((void *)asconf_param + sizeof(sctp_addip_param_t));
+
 	switch (addr_param->v4.param_hdr.type) {
 	case SCTP_PARAM_IPV6_ADDRESS:
 		if (!asoc->peer.ipv6_address)
@@ -2840,9 +2843,6 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 		return SCTP_ERROR_INV_PARAM;
 	}
 
-	addr_param = (union sctp_addr_param *)
-			((void *)asconf_param + sizeof(sctp_addip_param_t));
-
 	af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type));
 	if (unlikely(!af))
 		return SCTP_ERROR_INV_PARAM;
-- 
cgit v0.10.2


From 332223831e86b2e17b48b4afafad07d8e3b73861 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Tue, 13 May 2008 23:25:57 -0700
Subject: irda: Fix a misalign access issue. (v2)

Replace u16ho with put/get_unaligned functions

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/irda/discovery.h b/include/net/irda/discovery.h
index e4efad1..0ce9339 100644
--- a/include/net/irda/discovery.h
+++ b/include/net/irda/discovery.h
@@ -57,9 +57,6 @@ typedef union {
 	__u8  byte[2];
 } __u16_host_order;
 
-/* Same purpose, different application */
-#define u16ho(array) (* ((__u16 *) array))
-
 /* Types of discovery */
 typedef enum {
 	DISCOVERY_LOG,		/* What's in our discovery log */
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index bfacef8..a6f99b5 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -40,6 +40,8 @@
 
 #include <net/irda/discovery.h>
 
+#include <asm/unaligned.h>
+
 /*
  * Function irlmp_add_discovery (cachelog, discovery)
  *
@@ -87,7 +89,7 @@ void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new)
 			 */
 			hashbin_remove_this(cachelog, (irda_queue_t *) node);
 			/* Check if hints bits are unchanged */
-			if(u16ho(node->data.hints) == u16ho(new->data.hints))
+			if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints))
 				/* Set time of first discovery for this node */
 				new->firststamp = node->firststamp;
 			kfree(node);
@@ -281,9 +283,9 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
 		/* Mask out the ones we don't want :
 		 * We want to match the discovery mask, and to get only
 		 * the most recent one (unless we want old ones) */
-		if ((u16ho(discovery->data.hints) & mask) &&
+		if ((get_unaligned((__u16 *)discovery->data.hints) & mask) &&
 		    ((old_entries) ||
-		     ((jiffies - discovery->firststamp) < j_timeout)) ) {
+		     ((jiffies - discovery->firststamp) < j_timeout))) {
 			/* Create buffer as needed.
 			 * As this function get called a lot and most time
 			 * we don't have anything to put in the log (we are
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 1f81f8e..7bf5b91 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1062,7 +1062,8 @@ void irlmp_discovery_expiry(discinfo_t *expiries, int number)
 		for(i = 0; i < number; i++) {
 			/* Check if we should notify client */
 			if ((client->expir_callback) &&
-			    (client->hint_mask.word & u16ho(expiries[i].hints)
+			    (client->hint_mask.word &
+			     get_unaligned((__u16 *)expiries[i].hints)
 			     & 0x7f7f) )
 				client->expir_callback(&(expiries[i]),
 						       EXPIRY_TIMEOUT,
@@ -1086,7 +1087,7 @@ discovery_t *irlmp_get_discovery_response(void)
 
 	IRDA_ASSERT(irlmp != NULL, return NULL;);
 
-	u16ho(irlmp->discovery_rsp.data.hints) = irlmp->hints.word;
+	put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints);
 
 	/*
 	 *  Set character set for device name (we use ASCII), and
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index 75497e5..a3ec002 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -1673,7 +1673,7 @@ irnet_discovery_indication(discinfo_t *		discovery,
   /* Notify the control channel */
   irnet_post_event(NULL, IRNET_DISCOVER,
 		   discovery->saddr, discovery->daddr, discovery->info,
-		   u16ho(discovery->hints));
+		   get_unaligned((__u16 *)discovery->hints));
 
   DEXIT(IRDA_OCB_TRACE, "\n");
 }
@@ -1704,7 +1704,7 @@ irnet_expiry_indication(discinfo_t *	expiry,
   /* Notify the control channel */
   irnet_post_event(NULL, IRNET_EXPIRE,
 		   expiry->saddr, expiry->daddr, expiry->info,
-		   u16ho(expiry->hints));
+		   get_unaligned((__u16 *)expiry->hints));
 
   DEXIT(IRDA_OCB_TRACE, "\n");
 }
-- 
cgit v0.10.2


From 1eedf69993d4016428fd99ffd619e73b374be3c1 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Tue, 13 May 2008 23:27:11 -0700
Subject: netfilter: ctnetlink: dump conntrack ID in event messages

Conntrack ID is not put (anymore ?) in event messages. This causes
current ulogd2 code to fail because it uses the ID to build a hash in
userspace. This hash is used to be able to output the starting time of
a connection.

Conntrack ID can be used in userspace application to maintain an easy
match between kernel connections list and userspace one. It may worth
to add it if there is no performance related issue.

[ Patrick: it was never included in events, but really should be ]

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 16774ec..0edefcf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -472,6 +472,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
+	if (ctnetlink_dump_id(skb, ct) < 0)
+		goto nla_put_failure;
+
 	if (events & IPCT_DESTROY) {
 		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
 		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
-- 
cgit v0.10.2


From 01b7a314291b2ef56ad718ee1374a1bac4768b29 Mon Sep 17 00:00:00 2001
From: Phil Oester <kernel@linuxace.com>
Date: Tue, 13 May 2008 23:27:48 -0700
Subject: netfilter: xt_iprange: module aliases for xt_iprange

Using iptables 1.3.8 with kernel 2.6.25, rules which include '-m
iprange' don't automatically pull in xt_iprange module.  Below patch
adds module aliases to fix that.  Patch against latest -git, but seems
like a good candidate for -stable also.

Signed-off-by: Phil Oester <kernel@linuxace.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 500528d..c63e933 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -179,3 +179,5 @@ module_exit(iprange_mt_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>");
 MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
+MODULE_ALIAS("ipt_iprange");
+MODULE_ALIAS("ip6t_iprange");
-- 
cgit v0.10.2


From 4b95ede6f6116ae1c0ed9605ec97d856c4814569 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 13 May 2008 23:51:18 -0700
Subject: ppp: Do not free not yet unregistered net device.

An error path in ppp_create_interface() lacks one and may
BUG in free_netdev() checking for proper dev->reg_state.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index d3207c0..1f4ca2b 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -2458,6 +2458,7 @@ ppp_create_interface(int unit, int *retp)
 
 out3:
 	atomic_dec(&ppp_unit_count);
+	unregister_netdev(dev);
 out2:
 	mutex_unlock(&all_ppp_mutex);
 	free_netdev(dev);
-- 
cgit v0.10.2


From 9ee6b7f1556e7889eff4666483b1b554d4686cd4 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 14 May 2008 03:50:03 -0700
Subject: net: Fix typo in net/core/sock.c.

In sock_queue_rcv_skb()  (net/core/sock.c) it should be:
"Cast sk->rcvbuf ..." instead of: "Cast skb->rcvbuf ..."

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/sock.c b/net/core/sock.c
index fa76f04..88094cb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -270,7 +270,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	int err = 0;
 	int skb_len;
 
-	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
+	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
 	   number of warnings when compiling with -W --ANK
 	 */
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-- 
cgit v0.10.2