From d568df84f987a9321c1f5826a6c8678ef2bb2b70 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 12 Jul 2006 01:47:00 -0400
Subject: ACPI: handle firmware_register init errors

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index b297769..7b77ee1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
@@ -738,7 +739,10 @@ static int __init acpi_init(void)
 		return -ENODEV;
 	}
 
-	firmware_register(&acpi_subsys);
+	result = firmware_register(&acpi_subsys);
+	if (result < 0)
+		printk(KERN_WARNING "%s: firmware_register error: %d\n",
+			__FUNCTION__, result);
 
 	result = acpi_bus_init();
 
-- 
cgit v0.10.2


From 9b6d97b64eff08b368375efcf9c1d01eba582ea2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 12 Jul 2006 02:08:00 -0400
Subject: ACPI: scan: handle kset/kobject errors

Check and handle kset_register() and kobject_register() init errors.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 5fcb50c..698a154 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -4,6 +4,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/acpi.h>
 
 #include <acpi/acpi_drivers.h>
@@ -113,6 +114,8 @@ static struct kset acpi_namespace_kset = {
 static void acpi_device_register(struct acpi_device *device,
 				 struct acpi_device *parent)
 {
+	int err;
+
 	/*
 	 * Linkage
 	 * -------
@@ -138,7 +141,10 @@ static void acpi_device_register(struct acpi_device *device,
 		device->kobj.parent = &parent->kobj;
 	device->kobj.ktype = &ktype_acpi_ns;
 	device->kobj.kset = &acpi_namespace_kset;
-	kobject_register(&device->kobj);
+	err = kobject_register(&device->kobj);
+	if (err < 0)
+		printk(KERN_WARNING "%s: kobject_register error: %d\n",
+			__FUNCTION__, err);
 	create_sysfs_device_files(device);
 }
 
@@ -1450,7 +1456,9 @@ static int __init acpi_scan_init(void)
 	if (acpi_disabled)
 		return 0;
 
-	kset_register(&acpi_namespace_kset);
+	result = kset_register(&acpi_namespace_kset);
+	if (result < 0)
+		printk(KERN_ERR PREFIX "kset_register error: %d\n", result);
 
 	result = bus_register(&acpi_bus_type);
 	if (result) {
-- 
cgit v0.10.2


From 9805cb76f7bcd3108e012270d9ef2fd8ea3bea55 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 25 Jul 2006 13:30:57 -0400
Subject: ACPI: restore some dmesg to DEBUG-only, ala 2.6.17

The ACPI_EXCEPTION() patch enabled a bunch of messages to print
even in the non-DEBUG kernel.  Need to change a couple back,
and note that ACPI_EXCEPTION takes no \n, but ACPI_DEBUG_PRINT does.

No context for object [%p]\n
Device `[%s]' is not power manageable\n

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 7b77ee1..279c4ba 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -69,7 +69,8 @@ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device)
 
 	status = acpi_get_data(handle, acpi_bus_data_handler, (void **)device);
 	if (ACPI_FAILURE(status) || !*device) {
-		ACPI_EXCEPTION((AE_INFO, status, "No context for object [%p]", handle));
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No context for object [%p]\n",
+				  handle));
 		return -ENODEV;
 	}
 
@@ -193,7 +194,7 @@ int acpi_bus_set_power(acpi_handle handle, int state)
 	/* Make sure this is a valid target state */
 
 	if (!device->flags.power_manageable) {
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device `[%s]' is not power manageable",
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device `[%s]' is not power manageable\n",
 				device->kobj.name));
 		return -ENODEV;
 	}
-- 
cgit v0.10.2


From 16a74744231e57e354253567490ab9e4ccd2d605 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Wed, 5 Apr 2006 08:47:00 -0400
Subject: PCI: quirk to disable e100 interrupt if RESET failed to

Without this quirk, e100 can be pulling on a shared
interrupt line when another device (eg. USB) loads,
causing the interrupt to scream and get disabled.

http://bugzilla.kernel.org/show_bug.cgi?id=5918

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index e3c78c3..5ca1e94 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1511,6 +1511,63 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, quirk_netmos);
 
+static void __devinit quirk_e100_interrupt(struct pci_dev *dev)
+{
+	u16 command;
+	u32 bar;
+	u8 __iomem *csr;
+	u8 cmd_hi;
+
+	switch (dev->device) {
+	/* PCI IDs taken from drivers/net/e100.c */
+	case 0x1029:
+	case 0x1030 ... 0x1034:
+	case 0x1038 ... 0x103E:
+	case 0x1050 ... 0x1057:
+	case 0x1059:
+	case 0x1064 ... 0x106B:
+	case 0x1091 ... 0x1095:
+	case 0x1209:
+	case 0x1229:
+	case 0x2449:
+	case 0x2459:
+	case 0x245D:
+	case 0x27DC:
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 * Some firmware hands off the e100 with interrupts enabled,
+	 * which can cause a flood of interrupts if packets are
+	 * received before the driver attaches to the device.  So
+	 * disable all e100 interrupts here.  The driver will
+	 * re-enable them when it's ready.
+	 */
+	pci_read_config_word(dev, PCI_COMMAND, &command);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &bar);
+
+	if (!(command & PCI_COMMAND_MEMORY) || !bar)
+		return;
+
+	csr = ioremap(bar, 8);
+	if (!csr) {
+		printk(KERN_WARNING "PCI: Can't map %s e100 registers\n",
+			pci_name(dev));
+		return;
+	}
+
+	cmd_hi = readb(csr + 3);
+	if (cmd_hi == 0) {
+		printk(KERN_WARNING "PCI: Firmware left %s e100 interrupts "
+			"enabled, disabling\n", pci_name(dev));
+		writeb(1, csr + 3);
+	}
+
+	iounmap(csr);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_e100_interrupt);
 
 static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
 {
-- 
cgit v0.10.2


From 5b9c9bf6c92274a6eb74fc8f86586ab592a7a1ec Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Date: Wed, 26 Jul 2006 13:59:00 -0400
Subject: ACPI: add Dock Station driver to MAINTAINERS file

Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 645a9f8..cf64307 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -875,6 +875,12 @@ M:	rdunlap@xenotime.net
 T:	git http://tali.admingilde.org/git/linux-docbook.git
 S:	Maintained
 
+DOCKING STATION DRIVER
+P:	Kristen Carlson Accardi
+M:	kristen.c.accardi@intel.com
+L:	linux-acpi@vger.kernel.org
+S:	Maintained
+
 DOUBLETALK DRIVER
 P:	James R. Van Zandt
 M:	jrv@vanzandt.mv.com
-- 
cgit v0.10.2


From b2b3c121076961333977f485f0d54c22121df920 Mon Sep 17 00:00:00 2001
From: Grant Grundler <grundler@parisc-linux.org>
Date: Mon, 17 Jul 2006 07:22:45 -0600
Subject: [SCSI] sym2: claim only "Storage" class

The follow patch fixes a problem for Matt Taggart.
The Compaq system he had (dl380?) has a SmartArray device that exposes
the 53c1510 device in both RAID and "normal" modes. The difference
is in RAID mode, the smart array driver (IIRC) should claim the
device instead of sym2 driver. Patch below prevents sym2 from
claiming the device when the RAID "daughter board" is attached.

Signed-off-by: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 8c50507..739d3ef 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -2084,7 +2084,7 @@ static struct pci_device_id sym2_id_table[] __devinitdata = {
 	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_NCR_53C860,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
 	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_53C1510,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_SCSI<<8,  0xffff00, 0UL },
 	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_NCR_53C896,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
 	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_NCR_53C895,
-- 
cgit v0.10.2


From 8d4fbd3f978ba49498dc7c6430368c4ff86a09fc Mon Sep 17 00:00:00 2001
From: HighPoint Linux Team <linux@highpoint-tech.com>
Date: Mon, 24 Jul 2006 15:48:54 +0800
Subject: [SCSI] hptiop: wrong register used in hptiop_reset_hba()

IOP reset message should be posted to inbound message register
instead of outbound message register.

Signed-off-by: HighPoint Linux Team <linux@highpoint-tech.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index ab2f8b2..74d4d22 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -577,7 +577,7 @@ static int hptiop_reset_hba(struct hptiop_hba *hba)
 	if (atomic_xchg(&hba->resetting, 1) == 0) {
 		atomic_inc(&hba->reset_count);
 		writel(IOPMU_INBOUND_MSG0_RESET,
-				&hba->iop->outbound_msgaddr0);
+				&hba->iop->inbound_msgaddr0);
 		hptiop_pci_posting_flush(hba->iop);
 	}
 
-- 
cgit v0.10.2


From d82967c70658a408ea6cae5dc989ba8b2c0999e1 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:11 -0500
Subject: [SCSI] iscsi bugfixes: send correct error values to userspace

In the xmit patch we are sending a -EXXX value to iscsi_conn_failure
which is causing userspace to get confused.

We should be sending a ISCSI_ERR_* value that userspace understands.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 848fb2a..88dafdf 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -843,7 +843,7 @@ more:
 		       if (rc == -EAGAIN)
 				goto nomore;
 		       else {
-				iscsi_conn_failure(conn, rc);
+				iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 				return 0;
 		       }
 		}
@@ -859,7 +859,7 @@ more:
 			}
 			tcp_conn->in_progress = IN_PROGRESS_DATA_RECV;
 		} else if (rc) {
-			iscsi_conn_failure(conn, rc);
+			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 			return 0;
 		}
 	}
@@ -897,7 +897,7 @@ more:
 		if (rc) {
 			if (rc == -EAGAIN)
 				goto again;
-			iscsi_conn_failure(conn, rc);
+			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 			return 0;
 		}
 		tcp_conn->in.copy -= tcp_conn->in.padding;
-- 
cgit v0.10.2


From b6c395ed0387c824ddf125d3b74b576a2575c149 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:15 -0500
Subject: [SCSI] iscsi bugfixes: fix r2t handling

The iscsi tcp code can pluck multiple rt2s from the tasks's r2tqueue
in the xmit code. This can result in the task being queued on the xmit queue
but gettting completed at the same time.

This patch fixes the above bug by making the fifo a list so
we always remove the entry on the list del.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 88dafdf..ab324d9 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -185,11 +185,19 @@ iscsi_hdr_extract(struct iscsi_tcp_conn *tcp_conn)
  * must be called with session lock
  */
 static void
-__iscsi_ctask_cleanup(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+	struct iscsi_r2t_info *r2t;
 	struct scsi_cmnd *sc;
 
+	/* flush ctask's r2t queues */
+	while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
+			    sizeof(void*));
+		debug_scsi("iscsi_tcp_cleanup_ctask pending r2t dropped\n");
+	}
+
 	sc = ctask->sc;
 	if (unlikely(!sc))
 		return;
@@ -374,6 +382,7 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 		spin_unlock(&session->lock);
 		return 0;
 	}
+
 	rc = __kfifo_get(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	BUG_ON(!rc);
 
@@ -399,7 +408,7 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 	tcp_ctask->exp_r2tsn = r2tsn + 1;
 	tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
 	__kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
-	__kfifo_put(conn->xmitqueue, (void*)&ctask, sizeof(void*));
+	list_move_tail(&ctask->running, &conn->xmitqueue);
 
 	scsi_queue_work(session->host, &conn->xmitwork);
 	conn->r2t_pdus_cnt++;
@@ -484,7 +493,7 @@ iscsi_tcp_hdr_recv(struct iscsi_conn *conn)
 			goto copy_hdr;
 
 		spin_lock(&session->lock);
-		__iscsi_ctask_cleanup(conn, tcp_conn->in.ctask);
+		iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask);
 		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
 		spin_unlock(&session->lock);
 		break;
@@ -745,10 +754,11 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 done:
 	/* check for non-exceptional status */
 	if (tcp_conn->in.hdr->flags & ISCSI_FLAG_DATA_STATUS) {
-		debug_scsi("done [sc %lx res %d itt 0x%x]\n",
-			   (long)sc, sc->result, ctask->itt);
+		debug_scsi("done [sc %lx res %d itt 0x%x flags 0x%x]\n",
+			   (long)sc, sc->result, ctask->itt,
+			   tcp_conn->in.hdr->flags);
 		spin_lock(&conn->session->lock);
-		__iscsi_ctask_cleanup(conn, ctask);
+		iscsi_tcp_cleanup_ctask(conn, ctask);
 		__iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
 		spin_unlock(&conn->session->lock);
 	}
@@ -769,7 +779,7 @@ iscsi_data_recv(struct iscsi_conn *conn)
 		break;
 	case ISCSI_OP_SCSI_CMD_RSP:
 		spin_lock(&conn->session->lock);
-		__iscsi_ctask_cleanup(conn, tcp_conn->in.ctask);
+		iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask);
 		spin_unlock(&conn->session->lock);
 	case ISCSI_OP_TEXT_RSP:
 	case ISCSI_OP_LOGIN_RSP:
@@ -1308,7 +1318,7 @@ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
 				    ctask->imm_count -
 				    ctask->unsol_count;
 
-		debug_scsi("cmd [itt %x total %d imm %d imm_data %d "
+		debug_scsi("cmd [itt 0x%x total %d imm %d imm_data %d "
 			   "r2t_data %d]\n",
 			   ctask->itt, ctask->total_length, ctask->imm_count,
 			   ctask->unsol_count, tcp_ctask->r2t_data_count);
@@ -1636,7 +1646,7 @@ handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 	}
 solicit_again:
 	/*
-	 * send Data-Out whitnin this R2T sequence.
+	 * send Data-Out within this R2T sequence.
 	 */
 	if (!r2t->data_count)
 		goto data_out_done;
@@ -1731,7 +1741,7 @@ handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_data_task *dtask = tcp_ctask->dtask;
-	int sent, rc;
+	int sent = 0, rc;
 
 	tcp_ctask->xmstate &= ~XMSTATE_W_PAD;
 	iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
@@ -2002,20 +2012,6 @@ iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
 }
 
 static void
-iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_r2t_info *r2t;
-
-	/* flush ctask's r2t queues */
-	while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*)))
-		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
-			    sizeof(void*));
-
-	__iscsi_ctask_cleanup(conn, ctask);
-}
-
-static void
 iscsi_tcp_suspend_conn_rx(struct iscsi_conn *conn)
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
@@ -2057,6 +2053,7 @@ iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask,
 	iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr,
 			   sizeof(struct iscsi_hdr));
 	tcp_mtask->xmstate = XMSTATE_IMM_HDR;
+	tcp_mtask->sent = 0;
 
 	if (mtask->data_count)
 		iscsi_buf_init_iov(&tcp_mtask->sendbuf, (char*)mtask->data,
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 7e6e031..1a8cd20 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -189,6 +189,7 @@ static void iscsi_complete_command(struct iscsi_session *session,
 {
 	struct scsi_cmnd *sc = ctask->sc;
 
+	ctask->state = ISCSI_TASK_COMPLETED;
 	ctask->sc = NULL;
 	list_del_init(&ctask->running);
 	__kfifo_put(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
@@ -568,20 +569,24 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 	}
 
 	/* process command queue */
-	while (__kfifo_get(conn->xmitqueue, (void*)&conn->ctask,
-			   sizeof(void*))) {
+	spin_lock_bh(&conn->session->lock);
+	while (!list_empty(&conn->xmitqueue)) {
 		/*
 		 * iscsi tcp may readd the task to the xmitqueue to send
 		 * write data
 		 */
-		spin_lock_bh(&conn->session->lock);
-		if (list_empty(&conn->ctask->running))
-			list_add_tail(&conn->ctask->running, &conn->run_list);
+		conn->ctask = list_entry(conn->xmitqueue.next,
+					 struct iscsi_cmd_task, running);
+		conn->ctask->state = ISCSI_TASK_RUNNING;
+		list_move_tail(conn->xmitqueue.next, &conn->run_list);
 		spin_unlock_bh(&conn->session->lock);
+
 		rc = tt->xmit_cmd_task(conn, conn->ctask);
 		if (rc)
 			goto again;
+		spin_lock_bh(&conn->session->lock);
 	}
+	spin_unlock_bh(&conn->session->lock);
 	/* done with this ctask */
 	conn->ctask = NULL;
 
@@ -691,6 +696,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	sc->SCp.phase = session->age;
 	sc->SCp.ptr = (char *)ctask;
 
+	ctask->state = ISCSI_TASK_PENDING;
 	ctask->mtask = NULL;
 	ctask->conn = conn;
 	ctask->sc = sc;
@@ -700,7 +706,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 	session->tt->init_cmd_task(ctask);
 
-	__kfifo_put(conn->xmitqueue, (void*)&ctask, sizeof(void*));
+	list_add_tail(&ctask->running, &conn->xmitqueue);
 	debug_scsi(
 	       "ctask enq [%s cid %d sc %lx itt 0x%x len %d cmdsn %d win %d]\n",
 		sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
@@ -977,31 +983,27 @@ static int iscsi_exec_abort_task(struct scsi_cmnd *sc,
 /*
  * xmit mutex and session lock must be held
  */
-#define iscsi_remove_task(tasktype)					\
-static struct iscsi_##tasktype *					\
-iscsi_remove_##tasktype(struct kfifo *fifo, uint32_t itt)		\
-{									\
-	int i, nr_tasks = __kfifo_len(fifo) / sizeof(void*);		\
-	struct iscsi_##tasktype *task;					\
-									\
-	debug_scsi("searching %d tasks\n", nr_tasks);			\
-									\
-	for (i = 0; i < nr_tasks; i++) {				\
-		__kfifo_get(fifo, (void*)&task, sizeof(void*));		\
-		debug_scsi("check task %u\n", task->itt);		\
-									\
-		if (task->itt == itt) {					\
-			debug_scsi("matched task\n");			\
-			return task;					\
-		}							\
-									\
-		__kfifo_put(fifo, (void*)&task, sizeof(void*));		\
-	}								\
-	return NULL;							\
-}
+static struct iscsi_mgmt_task *
+iscsi_remove_mgmt_task(struct kfifo *fifo, uint32_t itt)
+{
+	int i, nr_tasks = __kfifo_len(fifo) / sizeof(void*);
+	struct iscsi_mgmt_task *task;
+
+	debug_scsi("searching %d tasks\n", nr_tasks);
+
+	for (i = 0; i < nr_tasks; i++) {
+		__kfifo_get(fifo, (void*)&task, sizeof(void*));
+		debug_scsi("check task %u\n", task->itt);
+
+		if (task->itt == itt) {
+			debug_scsi("matched task\n");
+			return task;
+		}
 
-iscsi_remove_task(mgmt_task);
-iscsi_remove_task(cmd_task);
+		__kfifo_put(fifo, (void*)&task, sizeof(void*));
+	}
+	return NULL;
+}
 
 static int iscsi_ctask_mtask_cleanup(struct iscsi_cmd_task *ctask)
 {
@@ -1043,7 +1045,6 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
 	struct iscsi_conn *conn = ctask->conn;
 	struct iscsi_session *session = conn->session;
-	struct iscsi_cmd_task *pending_ctask;
 	int rc;
 
 	conn->eh_abort_cnt++;
@@ -1071,17 +1072,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 		goto failed;
 	}
 
-	/* check for the easy pending cmd abort */
-	pending_ctask = iscsi_remove_cmd_task(conn->xmitqueue, ctask->itt);
-	if (pending_ctask) {
-		/* iscsi_tcp queues write transfers on the xmitqueue */
-		if (list_empty(&pending_ctask->running)) {
-			debug_scsi("found pending task\n");
-			goto success;
-		} else
-			__kfifo_put(conn->xmitqueue, (void*)&pending_ctask,
-				    sizeof(void*));
-	}
+	if (ctask->state == ISCSI_TASK_PENDING)
+		goto success;
 
 	conn->tmabort_state = TMABORT_INITIAL;
 
@@ -1263,6 +1255,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit,
 		if (cmd_task_size)
 			ctask->dd_data = &ctask[1];
 		ctask->itt = cmd_i;
+		INIT_LIST_HEAD(&ctask->running);
 	}
 
 	spin_lock_init(&session->lock);
@@ -1282,6 +1275,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit,
 		if (mgmt_task_size)
 			mtask->dd_data = &mtask[1];
 		mtask->itt = ISCSI_MGMT_ITT_OFFSET + cmd_i;
+		INIT_LIST_HEAD(&mtask->running);
 	}
 
 	if (scsi_add_host(shost, NULL))
@@ -1361,12 +1355,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
 	conn->tmabort_state = TMABORT_INITIAL;
 	INIT_LIST_HEAD(&conn->run_list);
 	INIT_LIST_HEAD(&conn->mgmt_run_list);
-
-	/* initialize general xmit PDU commands queue */
-	conn->xmitqueue = kfifo_alloc(session->cmds_max * sizeof(void*),
-					GFP_KERNEL, NULL);
-	if (conn->xmitqueue == ERR_PTR(-ENOMEM))
-		goto xmitqueue_alloc_fail;
+	INIT_LIST_HEAD(&conn->xmitqueue);
 
 	/* initialize general immediate & non-immediate PDU commands queue */
 	conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
@@ -1410,8 +1399,6 @@ login_mtask_alloc_fail:
 mgmtqueue_alloc_fail:
 	kfifo_free(conn->immqueue);
 immqueue_alloc_fail:
-	kfifo_free(conn->xmitqueue);
-xmitqueue_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
 	return NULL;
 }
@@ -1489,7 +1476,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 		session->cmdsn = session->max_cmdsn = session->exp_cmdsn = 1;
 	spin_unlock_bh(&session->lock);
 
-	kfifo_free(conn->xmitqueue);
 	kfifo_free(conn->immqueue);
 	kfifo_free(conn->mgmtqueue);
 
@@ -1572,7 +1558,7 @@ static void fail_all_commands(struct iscsi_conn *conn)
 	struct iscsi_cmd_task *ctask, *tmp;
 
 	/* flush pending */
-	while (__kfifo_get(conn->xmitqueue, (void*)&ctask, sizeof(void*))) {
+	list_for_each_entry_safe(ctask, tmp, &conn->xmitqueue, running) {
 		debug_scsi("failing pending sc %p itt 0x%x\n", ctask->sc,
 			   ctask->itt);
 		fail_command(conn, ctask, DID_BUS_BUSY << 16);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index ba27608..e71d6e9 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -83,6 +83,12 @@ struct iscsi_mgmt_task {
 	struct list_head	running;
 };
 
+enum {
+	ISCSI_TASK_COMPLETED,
+	ISCSI_TASK_PENDING,
+	ISCSI_TASK_RUNNING,
+};
+
 struct iscsi_cmd_task {
 	/*
 	 * Becuae LLDs allocate their hdr differently, this is a pointer to
@@ -101,6 +107,8 @@ struct iscsi_cmd_task {
 	struct iscsi_conn	*conn;		/* used connection    */
 	struct iscsi_mgmt_task	*mtask;		/* tmf mtask in progr */
 
+	/* state set/tested under session->lock */
+	int			state;
 	struct list_head	running;	/* running cmd list */
 	void			*dd_data;	/* driver/transport data */
 };
@@ -134,7 +142,7 @@ struct iscsi_conn {
 	struct kfifo		*immqueue;	/* immediate xmit queue */
 	struct kfifo		*mgmtqueue;	/* mgmt (control) xmit queue */
 	struct list_head	mgmt_run_list;	/* list of control tasks */
-	struct kfifo		*xmitqueue;	/* data-path cmd queue */
+	struct list_head	xmitqueue;	/* data-path cmd queue */
 	struct list_head	run_list;	/* list of cmds in progress */
 	struct work_struct	xmitwork;	/* per-conn. xmit workqueue */
 	/*
-- 
cgit v0.10.2


From 275fd7d129fdd16d1dc0ec6a8d60bd6b72a76e31 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:17 -0500
Subject: [SCSI] iscsi bugfixes: handle data rsp errors

if iscsi_data_rsp fails we must bail out. Since the pdu values like
data length are invalid we cannot continue to process the data since
it could over run buffers.

This fixes a bug with cisco 5428s where that target is sending
too much data.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index ab324d9..7d78459 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -486,6 +486,8 @@ iscsi_tcp_hdr_recv(struct iscsi_conn *conn)
 	case ISCSI_OP_SCSI_DATA_IN:
 		tcp_conn->in.ctask = session->cmds[itt];
 		rc = iscsi_data_rsp(conn, tcp_conn->in.ctask);
+		if (rc)
+			return rc;
 		/* fall through */
 	case ISCSI_OP_SCSI_CMD_RSP:
 		tcp_conn->in.ctask = session->cmds[itt];
@@ -532,7 +534,7 @@ copy_hdr:
 	 * skbs to complete the command then we have to copy the header
 	 * for later use
 	 */
-	if (tcp_conn->in.zero_copy_hdr && tcp_conn->in.copy <
+	if (tcp_conn->in.zero_copy_hdr && tcp_conn->in.copy <=
 	   (tcp_conn->in.datalen + tcp_conn->in.padding +
 	    (conn->datadgst_en ? 4 : 0))) {
 		debug_tcp("Copying header for later use. in.copy %d in.datalen"
-- 
cgit v0.10.2


From 7ea8b82847293c2311cf08fc3ed31ab0e452a27e Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:22 -0500
Subject: [SCSI] iscsi bugfixes: fix abort handling

Abort handler fixes.

If a connection is dropped and reconnected while an abort is
running then we should assume the recovery code will clean up
the abort. Not doing so causes a oops.

And if a command completes then we get the status for the abort, we do not
need to call into the LLD to cleanup the resources. Doing this causes
and oops in iser because it ends up freeing some resources twice.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 1a8cd20..4e2ca8f 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -276,6 +276,25 @@ out:
 	return rc;
 }
 
+static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
+{
+	struct iscsi_tm_rsp *tmf = (struct iscsi_tm_rsp *)hdr;
+
+	conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
+	conn->tmfrsp_pdus_cnt++;
+
+	if (conn->tmabort_state != TMABORT_INITIAL)
+		return;
+
+	if (tmf->response == ISCSI_TMF_RSP_COMPLETE)
+		conn->tmabort_state = TMABORT_SUCCESS;
+	else if (tmf->response == ISCSI_TMF_RSP_NO_TASK)
+		conn->tmabort_state = TMABORT_NOT_FOUND;
+	else
+		conn->tmabort_state = TMABORT_FAILED;
+	wake_up(&conn->ehwait);
+}
+
 /**
  * __iscsi_complete_pdu - complete pdu
  * @conn: iscsi conn
@@ -361,16 +380,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 				break;
 			}
 
-			conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
-			conn->tmfrsp_pdus_cnt++;
-			if (conn->tmabort_state == TMABORT_INITIAL) {
-				conn->tmabort_state =
-					((struct iscsi_tm_rsp *)hdr)->
-					response == ISCSI_TMF_RSP_COMPLETE ?
-						TMABORT_SUCCESS:TMABORT_FAILED;
-				/* unblock eh_abort() */
-				wake_up(&conn->ehwait);
-			}
+			iscsi_tmf_rsp(conn, hdr);
 			break;
 		case ISCSI_OP_NOOP_IN:
 			if (hdr->ttt != ISCSI_RESERVED_TAG) {
@@ -1029,12 +1039,13 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 {
 	struct scsi_cmnd *sc;
 
-	conn->session->tt->cleanup_cmd_task(conn, ctask);
-	iscsi_ctask_mtask_cleanup(ctask);
-
 	sc = ctask->sc;
 	if (!sc)
 		return;
+
+	conn->session->tt->cleanup_cmd_task(conn, ctask);
+	iscsi_ctask_mtask_cleanup(ctask);
+
 	sc->result = err;
 	sc->resid = sc->request_bufflen;
 	iscsi_complete_command(conn->session, ctask);
@@ -1062,8 +1073,11 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 		goto failed;
 
 	/* ctask completed before time out */
-	if (!ctask->sc)
-		goto success;
+	if (!ctask->sc) {
+		spin_unlock_bh(&session->lock);
+		debug_scsi("sc completed while abort in progress\n");
+		goto success_rel_mutex;
+	}
 
 	/* what should we do here ? */
 	if (conn->ctask == ctask) {
@@ -1073,7 +1087,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	}
 
 	if (ctask->state == ISCSI_TASK_PENDING)
-		goto success;
+		goto success_cleanup;
 
 	conn->tmabort_state = TMABORT_INITIAL;
 
@@ -1081,25 +1095,31 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	rc = iscsi_exec_abort_task(sc, ctask);
 	spin_lock_bh(&session->lock);
 
-	iscsi_ctask_mtask_cleanup(ctask);
 	if (rc || sc->SCp.phase != session->age ||
 	    session->state != ISCSI_STATE_LOGGED_IN)
 		goto failed;
+	iscsi_ctask_mtask_cleanup(ctask);
 
-	/* ctask completed before tmf abort response */
-	if (!ctask->sc) {
-		debug_scsi("sc completed while abort in progress\n");
-		goto success;
-	}
-
-	if (conn->tmabort_state != TMABORT_SUCCESS) {
+	switch (conn->tmabort_state) {
+	case TMABORT_SUCCESS:
+		goto success_cleanup;
+	case TMABORT_NOT_FOUND:
+		if (!ctask->sc) {
+			/* ctask completed before tmf abort response */
+			spin_unlock_bh(&session->lock);
+			debug_scsi("sc completed while abort in progress\n");
+			goto success_rel_mutex;
+		}
+		/* fall through */
+	default:
+		/* timedout or failed */
 		spin_unlock_bh(&session->lock);
 		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 		spin_lock_bh(&session->lock);
 		goto failed;
 	}
 
-success:
+success_cleanup:
 	debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
 	spin_unlock_bh(&session->lock);
 
@@ -1113,6 +1133,7 @@ success:
 	spin_unlock(&session->lock);
 	write_unlock_bh(conn->recv_lock);
 
+success_rel_mutex:
 	mutex_unlock(&conn->xmitmutex);
 	return SUCCESS;
 
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index e71d6e9..3f69f7e 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -60,6 +60,7 @@ struct iscsi_nopin;
 #define TMABORT_SUCCESS			0x1
 #define TMABORT_FAILED			0x2
 #define TMABORT_TIMEDOUT		0x3
+#define TMABORT_NOT_FOUND		0x4
 
 /* Connection suspend "bit" */
 #define ISCSI_SUSPEND_BIT		1
-- 
cgit v0.10.2


From 1c83469d36a9dd30dbf1fb9fc5ca3be3a0e64ff4 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:26 -0500
Subject: [SCSI] iscsi bugfixes: fix oops when iser is flushing io

When we enter recovery and flush the running commands
we cannot freee the connection before flushing the commands.
Some commands may have a reference to the connection
that needs to be released before. iscsi_stop was forcing
the term and suspend too early and was causing a oops
in iser, so this patch removes those callbacks all together
and allows the LLD to handle that detail.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 34b0da5..1437d7e 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -378,21 +378,6 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
 	return iser_conn_set_full_featured_mode(conn);
 }
 
-static void
-iscsi_iser_conn_terminate(struct iscsi_conn *conn)
-{
-	struct iscsi_iser_conn *iser_conn = conn->dd_data;
-	struct iser_conn *ib_conn = iser_conn->ib_conn;
-
-	BUG_ON(!ib_conn);
-	/* starts conn teardown process, waits until all previously   *
-	 * posted buffers get flushed, deallocates all conn resources */
-	iser_conn_terminate(ib_conn);
-	iser_conn->ib_conn = NULL;
-	conn->recv_lock = NULL;
-}
-
-
 static struct iscsi_transport iscsi_iser_transport;
 
 static struct iscsi_cls_session *
@@ -555,13 +540,13 @@ iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms)
 static void
 iscsi_iser_ep_disconnect(__u64 ep_handle)
 {
-	struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle);
+	struct iser_conn *ib_conn;
 
+	ib_conn = iscsi_iser_ib_conn_lookup(ep_handle);
 	if (!ib_conn)
 		return;
 
 	iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state);
-
 	iser_conn_terminate(ib_conn);
 }
 
@@ -614,9 +599,6 @@ static struct iscsi_transport iscsi_iser_transport = {
 	.get_session_param	= iscsi_session_get_param,
 	.start_conn             = iscsi_iser_conn_start,
 	.stop_conn              = iscsi_conn_stop,
-	/* these are called as part of conn recovery */
-	.suspend_conn_recv	= NULL, /* FIXME is/how this relvant to iser? */
-	.terminate_conn		= iscsi_iser_conn_terminate,
 	/* IO */
 	.send_pdu		= iscsi_conn_send_pdu,
 	.get_stats		= iscsi_iser_conn_get_stats,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 7d78459..b6c68be 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -1040,9 +1040,8 @@ iscsi_conn_set_callbacks(struct iscsi_conn *conn)
 }
 
 static void
-iscsi_conn_restore_callbacks(struct iscsi_conn *conn)
+iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
 {
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct sock *sk = tcp_conn->sock->sk;
 
 	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
@@ -1933,6 +1932,23 @@ tcp_conn_alloc_fail:
 }
 
 static void
+iscsi_tcp_release_conn(struct iscsi_conn *conn)
+{
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+
+	if (!tcp_conn->sock)
+		return;
+
+	sock_hold(tcp_conn->sock->sk);
+	iscsi_conn_restore_callbacks(tcp_conn);
+	sock_put(tcp_conn->sock->sk);
+
+	sock_release(tcp_conn->sock);
+	tcp_conn->sock = NULL;
+	conn->recv_lock = NULL;
+}
+
+static void
 iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
@@ -1942,6 +1958,7 @@ iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
 	if (conn->hdrdgst_en || conn->datadgst_en)
 		digest = 1;
 
+	iscsi_tcp_release_conn(conn);
 	iscsi_conn_teardown(cls_conn);
 
 	/* now free tcp_conn */
@@ -1965,6 +1982,15 @@ iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
 	kfree(tcp_conn);
 }
 
+static void
+iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+
+	iscsi_conn_stop(cls_conn, flag);
+	iscsi_tcp_release_conn(conn);
+}
+
 static int
 iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
 		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
@@ -2013,38 +2039,6 @@ iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
 	return 0;
 }
 
-static void
-iscsi_tcp_suspend_conn_rx(struct iscsi_conn *conn)
-{
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	struct sock *sk;
-
-	if (!tcp_conn->sock)
-		return;
-
-	sk = tcp_conn->sock->sk;
-	write_lock_bh(&sk->sk_callback_lock);
-	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
-	write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void
-iscsi_tcp_terminate_conn(struct iscsi_conn *conn)
-{
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-
-	if (!tcp_conn->sock)
-		return;
-
-	sock_hold(tcp_conn->sock->sk);
-	iscsi_conn_restore_callbacks(conn);
-	sock_put(tcp_conn->sock->sk);
-
-	sock_release(tcp_conn->sock);
-	tcp_conn->sock = NULL;
-	conn->recv_lock = NULL;
-}
-
 /* called with host lock */
 static void
 iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask,
@@ -2413,10 +2407,7 @@ static struct iscsi_transport iscsi_tcp_transport = {
 	.get_conn_param		= iscsi_tcp_conn_get_param,
 	.get_session_param	= iscsi_session_get_param,
 	.start_conn		= iscsi_conn_start,
-	.stop_conn		= iscsi_conn_stop,
-	/* these are called as part of conn recovery */
-	.suspend_conn_recv	= iscsi_tcp_suspend_conn_rx,
-	.terminate_conn		= iscsi_tcp_terminate_conn,
+	.stop_conn		= iscsi_tcp_conn_stop,
 	/* IO */
 	.send_pdu		= iscsi_conn_send_pdu,
 	.get_stats		= iscsi_conn_get_stats,
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 4e2ca8f..36f520b 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1440,12 +1440,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 
 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
 	mutex_lock(&conn->xmitmutex);
-	if (conn->c_stage == ISCSI_CONN_INITIAL_STAGE) {
-		if (session->tt->suspend_conn_recv)
-			session->tt->suspend_conn_recv(conn);
-
-		session->tt->terminate_conn(conn);
-	}
 
 	spin_lock_bh(&session->lock);
 	conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
@@ -1622,8 +1616,9 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
 	spin_unlock_bh(&session->lock);
 
-	if (session->tt->suspend_conn_recv)
-		session->tt->suspend_conn_recv(conn);
+	write_lock_bh(conn->recv_lock);
+	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+	write_unlock_bh(conn->recv_lock);
 
 	mutex_lock(&conn->xmitmutex);
 	/*
@@ -1642,7 +1637,6 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 		}
 	}
 
-	session->tt->terminate_conn(conn);
 	/*
 	 * flush queues.
 	 */
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 5a3df1d..39e8332 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -57,8 +57,6 @@ struct sockaddr;
  * @stop_conn:		suspend/recover/terminate connection
  * @send_pdu:		send iSCSI PDU, Login, Logout, NOP-Out, Reject, Text.
  * @session_recovery_timedout: notify LLD a block during recovery timed out
- * @suspend_conn_recv:	susepend the recv side of the connection
- * @termincate_conn:	destroy socket connection. Called with mutex lock.
  * @init_cmd_task:	Initialize a iscsi_cmd_task and any internal structs.
  *			Called from queuecommand with session lock held.
  * @init_mgmt_task:	Initialize a iscsi_mgmt_task and any internal structs.
@@ -112,8 +110,6 @@ struct iscsi_transport {
 			 char *data, uint32_t data_size);
 	void (*get_stats) (struct iscsi_cls_conn *conn,
 			   struct iscsi_stats *stats);
-	void (*suspend_conn_recv) (struct iscsi_conn *conn);
-	void (*terminate_conn) (struct iscsi_conn *conn);
 	void (*init_cmd_task) (struct iscsi_cmd_task *ctask);
 	void (*init_mgmt_task) (struct iscsi_conn *conn,
 				struct iscsi_mgmt_task *mtask,
-- 
cgit v0.10.2


From 63f75cc8a7e6ce453e38a7b90cdcae83d63f1ea7 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:29 -0500
Subject: [SCSI] iscsi bugfixes: fix oops when removing session

We are touching the cls_session after we have freed
it. This causes a oops.

Signed-off-by: Or Gerlitz  <ogerlitz@voltaire.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 36f520b..c989bc6 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1337,6 +1337,7 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
 {
 	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
 	struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
+	struct module *owner = cls_session->transport->owner;
 
 	scsi_remove_host(shost);
 
@@ -1345,7 +1346,7 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
 
 	iscsi_destroy_session(cls_session);
 	scsi_host_put(shost);
-	module_put(cls_session->transport->owner);
+	module_put(owner);
 }
 EXPORT_SYMBOL_GPL(iscsi_session_teardown);
 
-- 
cgit v0.10.2


From 9aaa2b4621280b6de1ecfb6dd7cd5cbe59fd1264 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:34 -0500
Subject: [SCSI] iscsi bugfixes: dont use GFP_KERNEL for sending errors

iscsi_tcp can send error events from soft irq context so we
cannot use GFP_KERNEL.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 7b9e8fa..30a47c1 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -634,13 +634,13 @@ mempool_zone_get_skb(struct mempool_zone *zone)
 }
 
 static int
-iscsi_broadcast_skb(struct mempool_zone *zone, struct sk_buff *skb)
+iscsi_broadcast_skb(struct mempool_zone *zone, struct sk_buff *skb, gfp_t gfp)
 {
 	unsigned long flags;
 	int rc;
 
 	skb_get(skb);
-	rc = netlink_broadcast(nls, skb, 0, 1, GFP_KERNEL);
+	rc = netlink_broadcast(nls, skb, 0, 1, gfp);
 	if (rc < 0) {
 		mempool_free(skb, zone->pool);
 		printk(KERN_ERR "iscsi: can not broadcast skb (%d)\n", rc);
@@ -749,7 +749,7 @@ void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	ev->r.connerror.cid = conn->cid;
 	ev->r.connerror.sid = iscsi_conn_get_sid(conn);
 
-	iscsi_broadcast_skb(conn->z_error, skb);
+	iscsi_broadcast_skb(conn->z_error, skb, GFP_ATOMIC);
 
 	dev_printk(KERN_INFO, &conn->dev, "iscsi: detected conn error (%d)\n",
 		   error);
@@ -895,7 +895,7 @@ int iscsi_if_destroy_session_done(struct iscsi_cls_conn *conn)
 	 * this will occur if the daemon is not up, so we just warn
 	 * the user and when the daemon is restarted it will handle it
 	 */
-	rc = iscsi_broadcast_skb(conn->z_pdu, skb);
+	rc = iscsi_broadcast_skb(conn->z_pdu, skb, GFP_KERNEL);
 	if (rc < 0)
 		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
 			  "session destruction event. Check iscsi daemon\n");
@@ -958,7 +958,7 @@ int iscsi_if_create_session_done(struct iscsi_cls_conn *conn)
 	 * this will occur if the daemon is not up, so we just warn
 	 * the user and when the daemon is restarted it will handle it
 	 */
-	rc = iscsi_broadcast_skb(conn->z_pdu, skb);
+	rc = iscsi_broadcast_skb(conn->z_pdu, skb, GFP_KERNEL);
 	if (rc < 0)
 		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
 			  "session creation event. Check iscsi daemon\n");
-- 
cgit v0.10.2


From c8dc1e523b0f1e6dd71cdabd8c7d7587c6dc27f9 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:39 -0500
Subject: [SCSI] iscsi bugfixes: reduce memory allocations

We currently try to allocate a max_recv_data_segment_length
which can be very large (default is 64K), and common uses
are up to 1MB. It is very very difficult to allocte this
much contiguous memory and it turns out we never even use it.
We really only need a couple of pages, so this patch has us
allocates just what we know what we need today.

Later if vendors start adding vendor specific data and
we need to handle large buffers we can do this, but for
the last 4 years we have not seen anyone do this or request
it.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index b6c68be..aa20adc 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -511,13 +511,28 @@ iscsi_tcp_hdr_recv(struct iscsi_conn *conn)
 		break;
 	case ISCSI_OP_LOGIN_RSP:
 	case ISCSI_OP_TEXT_RSP:
-	case ISCSI_OP_LOGOUT_RSP:
-	case ISCSI_OP_NOOP_IN:
 	case ISCSI_OP_REJECT:
 	case ISCSI_OP_ASYNC_EVENT:
+		/*
+		 * It is possible that we could get a PDU with a buffer larger
+		 * than 8K, but there are no targets that currently do this.
+		 * For now we fail until we find a vendor that needs it
+		 */
+		if (DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH <
+		    tcp_conn->in.datalen) {
+			printk(KERN_ERR "iscsi_tcp: received buffer of len %u "
+			      "but conn buffer is only %u (opcode %0x)\n",
+			      tcp_conn->in.datalen,
+			      DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH, opcode);
+			rc = ISCSI_ERR_PROTO;
+			break;
+		}
+
 		if (tcp_conn->in.datalen)
 			goto copy_hdr;
 	/* fall through */
+	case ISCSI_OP_LOGOUT_RSP:
+	case ISCSI_OP_NOOP_IN:
 	case ISCSI_OP_SCSI_TMFUNC_RSP:
 		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
 		break;
@@ -625,9 +640,9 @@ iscsi_ctask_copy(struct iscsi_tcp_conn *tcp_conn, struct iscsi_cmd_task *ctask,
  *	byte counters.
  **/
 static inline int
-iscsi_tcp_copy(struct iscsi_tcp_conn *tcp_conn)
+iscsi_tcp_copy(struct iscsi_conn *conn)
 {
-	void *buf = tcp_conn->data;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	int buf_size = tcp_conn->in.datalen;
 	int buf_left = buf_size - tcp_conn->data_copied;
 	int size = min(tcp_conn->in.copy, buf_left);
@@ -638,7 +653,7 @@ iscsi_tcp_copy(struct iscsi_tcp_conn *tcp_conn)
 	BUG_ON(size <= 0);
 
 	rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
-			   (char*)buf + tcp_conn->data_copied, size);
+			   (char*)conn->data + tcp_conn->data_copied, size);
 	BUG_ON(rc);
 
 	tcp_conn->in.offset += size;
@@ -785,22 +800,21 @@ iscsi_data_recv(struct iscsi_conn *conn)
 		spin_unlock(&conn->session->lock);
 	case ISCSI_OP_TEXT_RSP:
 	case ISCSI_OP_LOGIN_RSP:
-	case ISCSI_OP_NOOP_IN:
 	case ISCSI_OP_ASYNC_EVENT:
 	case ISCSI_OP_REJECT:
 		/*
 		 * Collect data segment to the connection's data
 		 * placeholder
 		 */
-		if (iscsi_tcp_copy(tcp_conn)) {
+		if (iscsi_tcp_copy(conn)) {
 			rc = -EAGAIN;
 			goto exit;
 		}
 
-		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, tcp_conn->data,
+		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, conn->data,
 					tcp_conn->in.datalen);
 		if (!rc && conn->datadgst_en && opcode != ISCSI_OP_LOGIN_RSP)
-			iscsi_recv_digest_update(tcp_conn, tcp_conn->data,
+			iscsi_recv_digest_update(tcp_conn, conn->data,
 			  			tcp_conn->in.datalen);
 		break;
 	default:
@@ -1911,21 +1925,9 @@ iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
 	tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
 	/* initial operational parameters */
 	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
-	tcp_conn->data_size = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH;
-
-	/* allocate initial PDU receive place holder */
-	if (tcp_conn->data_size <= PAGE_SIZE)
-		tcp_conn->data = kmalloc(tcp_conn->data_size, GFP_KERNEL);
-	else
-		tcp_conn->data = (void*)__get_free_pages(GFP_KERNEL,
-					get_order(tcp_conn->data_size));
-	if (!tcp_conn->data)
-		goto max_recv_dlenght_alloc_fail;
 
 	return cls_conn;
 
-max_recv_dlenght_alloc_fail:
-	kfree(tcp_conn);
 tcp_conn_alloc_fail:
 	iscsi_conn_teardown(cls_conn);
 	return NULL;
@@ -1973,12 +1975,6 @@ iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
 			crypto_free_tfm(tcp_conn->data_rx_tfm);
 	}
 
-	/* free conn->data, size = MaxRecvDataSegmentLength */
-	if (tcp_conn->data_size <= PAGE_SIZE)
-		kfree(tcp_conn->data);
-	else
-		free_pages((unsigned long)tcp_conn->data,
-			   get_order(tcp_conn->data_size));
 	kfree(tcp_conn);
 }
 
@@ -2131,39 +2127,6 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
 	int value;
 
 	switch(param) {
-	case ISCSI_PARAM_MAX_RECV_DLENGTH: {
-		char *saveptr = tcp_conn->data;
-		gfp_t flags = GFP_KERNEL;
-
-		sscanf(buf, "%d", &value);
-		if (tcp_conn->data_size >= value) {
-			iscsi_set_param(cls_conn, param, buf, buflen);
-			break;
-		}
-
-		spin_lock_bh(&session->lock);
-		if (conn->stop_stage == STOP_CONN_RECOVER)
-			flags = GFP_ATOMIC;
-		spin_unlock_bh(&session->lock);
-
-		if (value <= PAGE_SIZE)
-			tcp_conn->data = kmalloc(value, flags);
-		else
-			tcp_conn->data = (void*)__get_free_pages(flags,
-							     get_order(value));
-		if (tcp_conn->data == NULL) {
-			tcp_conn->data = saveptr;
-			return -ENOMEM;
-		}
-		if (tcp_conn->data_size <= PAGE_SIZE)
-			kfree(saveptr);
-		else
-			free_pages((unsigned long)saveptr,
-				   get_order(tcp_conn->data_size));
-		iscsi_set_param(cls_conn, param, buf, buflen);
-		tcp_conn->data_size = value;
-		break;
-		}
 	case ISCSI_PARAM_HDRDGST_EN:
 		iscsi_set_param(cls_conn, param, buf, buflen);
 		tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h
index 8083028..6a4ee70 100644
--- a/drivers/scsi/iscsi_tcp.h
+++ b/drivers/scsi/iscsi_tcp.h
@@ -78,8 +78,6 @@ struct iscsi_tcp_conn {
 	char			hdrext[4*sizeof(__u16) +
 				    sizeof(__u32)];
 	int			data_copied;
-	char			*data;		/* data placeholder */
-	int			data_size;	/* actual recv_dlength */
 	int			stop_stage;	/* conn_stop() flag: *
 						 * stop to recover,  *
 						 * stop to terminate */
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index c989bc6..03b3dee 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -360,6 +360,10 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 
 		switch(opcode) {
 		case ISCSI_OP_LOGOUT_RSP:
+			if (datalen) {
+				rc = ISCSI_ERR_PROTO;
+				break;
+			}
 			conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
 			/* fall through */
 		case ISCSI_OP_LOGIN_RSP:
@@ -383,7 +387,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			iscsi_tmf_rsp(conn, hdr);
 			break;
 		case ISCSI_OP_NOOP_IN:
-			if (hdr->ttt != ISCSI_RESERVED_TAG) {
+			if (hdr->ttt != ISCSI_RESERVED_TAG || datalen) {
 				rc = ISCSI_ERR_PROTO;
 				break;
 			}
@@ -1405,7 +1409,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
 	data = kmalloc(DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH, GFP_KERNEL);
 	if (!data)
 		goto login_mtask_data_alloc_fail;
-	conn->login_mtask->data = data;
+	conn->login_mtask->data = conn->data = data;
 
 	init_timer(&conn->tmabort_timer);
 	mutex_init(&conn->xmitmutex);
@@ -1477,7 +1481,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	}
 
 	spin_lock_bh(&session->lock);
-	kfree(conn->login_mtask->data);
+	kfree(conn->data);
 	__kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
 		    sizeof(void*));
 	list_del(&conn->item);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 3f69f7e..41904f6 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -135,6 +135,14 @@ struct iscsi_conn {
 	int			id;		/* CID */
 	struct list_head	item;		/* maintains list of conns */
 	int			c_stage;	/* connection state */
+	/*
+	 * Preallocated buffer for pdus that have data but do not
+	 * originate from scsi-ml. We never have two pdus using the
+	 * buffer at the same time. It is only allocated to
+	 * the default max recv size because the pdus we support
+	 * should always fit in this buffer
+	 */
+	char			*data;
 	struct iscsi_mgmt_task	*login_mtask;	/* mtask used for login/text */
 	struct iscsi_mgmt_task	*mtask;		/* xmit mtask in progress */
 	struct iscsi_cmd_task	*ctask;		/* xmit ctask in progress */
-- 
cgit v0.10.2


From 40527afea1a3b18ee5754e17d6f807176e03f1f2 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:45 -0500
Subject: [SCSI] iscsi bugfixes: pass errors from complete_pdu to caller

Must pass ISCSI_ERR values from the recv path and propogate them
upwards.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index aa20adc..33534f6 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -885,7 +885,7 @@ more:
 			}
 			tcp_conn->in_progress = IN_PROGRESS_DATA_RECV;
 		} else if (rc) {
-			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+			iscsi_conn_failure(conn, rc);
 			return 0;
 		}
 	}
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 03b3dee..c628c63 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -372,7 +372,8 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			 * login related PDU's exp_statsn is handled in
 			 * userspace
 			 */
-			rc = iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen);
+			if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
+				rc = ISCSI_ERR_CONN_FAILED;
 			list_del(&mtask->running);
 			if (conn->login_mtask != mtask)
 				__kfifo_put(session->mgmtpool.queue,
@@ -393,7 +394,8 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			}
 			conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
 
-			rc = iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen);
+			if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
+				rc = ISCSI_ERR_CONN_FAILED;
 			list_del(&mtask->running);
 			if (conn->login_mtask != mtask)
 				__kfifo_put(session->mgmtpool.queue,
@@ -406,14 +408,21 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	} else if (itt == ISCSI_RESERVED_TAG) {
 		switch(opcode) {
 		case ISCSI_OP_NOOP_IN:
-			if (!datalen) {
-				rc = iscsi_check_assign_cmdsn(session,
-						 (struct iscsi_nopin*)hdr);
-				if (!rc && hdr->ttt != ISCSI_RESERVED_TAG)
-					rc = iscsi_recv_pdu(conn->cls_conn,
-							    hdr, NULL, 0);
-			} else
+			if (datalen) {
 				rc = ISCSI_ERR_PROTO;
+				break;
+			}
+
+			rc = iscsi_check_assign_cmdsn(session,
+						 (struct iscsi_nopin*)hdr);
+			if (rc)
+				break;
+
+			if (hdr->ttt == ISCSI_RESERVED_TAG)
+				break;
+
+			if (iscsi_recv_pdu(conn->cls_conn, hdr, NULL, 0))
+				rc = ISCSI_ERR_CONN_FAILED;
 			break;
 		case ISCSI_OP_REJECT:
 			/* we need sth like iscsi_reject_rsp()*/
-- 
cgit v0.10.2


From f3ff0c3627df90aa28be98803b10289bb348e4e6 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:50 -0500
Subject: [SCSI] iscsi bugfixes: fix mem leaks in libiscsi

We were leaking some strings. This patch just frees them.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index c628c63..5884cd2 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1357,6 +1357,8 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
 	iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
 	iscsi_pool_free(&session->cmdpool, (void**)session->cmds);
 
+	kfree(session->targetname);
+
 	iscsi_destroy_session(cls_session);
 	scsi_host_put(shost);
 	module_put(owner);
@@ -1491,6 +1493,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 
 	spin_lock_bh(&session->lock);
 	kfree(conn->data);
+	kfree(conn->persistent_address);
 	__kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
 		    sizeof(void*));
 	list_del(&conn->item);
-- 
cgit v0.10.2


From f4246b33c774bb4330eb6286beed6d3c9f4373c4 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 24 Jul 2006 15:47:54 -0500
Subject: [SCSI] iscsi bugfixes: update and move version number

The version info is useful for iscsi tcp, iser and qla4xxx so move to
transport class.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 33534f6..058f094 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -43,13 +43,10 @@
 
 #include "iscsi_tcp.h"
 
-#define ISCSI_TCP_VERSION "1.0-595"
-
 MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
 	      "Alex Aizman <itn780@yahoo.com>");
 MODULE_DESCRIPTION("iSCSI/TCP data-path");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(ISCSI_TCP_VERSION);
 /* #define DEBUG_TCP */
 #define DEBUG_ASSERT
 
@@ -2317,8 +2314,7 @@ static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
 }
 
 static struct scsi_host_template iscsi_sht = {
-	.name			= "iSCSI Initiator over TCP/IP, v"
-				  ISCSI_TCP_VERSION,
+	.name			= "iSCSI Initiator over TCP/IP",
 	.queuecommand           = iscsi_queuecommand,
 	.change_queue_depth	= iscsi_change_queue_depth,
 	.can_queue		= ISCSI_XMIT_CMDS_MAX - 1,
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 30a47c1..2ecd141 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -34,6 +34,7 @@
 #define ISCSI_SESSION_ATTRS 11
 #define ISCSI_CONN_ATTRS 11
 #define ISCSI_HOST_ATTRS 0
+#define ISCSI_TRANSPORT_VERSION "1.1-646"
 
 struct iscsi_internal {
 	int daemon_pid;
@@ -1613,6 +1614,9 @@ static __init int iscsi_transport_init(void)
 {
 	int err;
 
+	printk(KERN_INFO "Loading iSCSI transport class v%s.",
+		ISCSI_TRANSPORT_VERSION);
+
 	err = class_register(&iscsi_transport_class);
 	if (err)
 		return err;
@@ -1678,3 +1682,4 @@ MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
 	      "Alex Aizman <itn780@yahoo.com>");
 MODULE_DESCRIPTION("iSCSI Transport Interface");
 MODULE_LICENSE("GPL");
+MODULE_VERSION(ISCSI_TRANSPORT_VERSION);
-- 
cgit v0.10.2


From fbf6080225a03aa2b3671acacebdf615f1d3f6ba Mon Sep 17 00:00:00 2001
From: "Ju, Seokmann" <Seokmann.Ju@lsil.com>
Date: Tue, 25 Jul 2006 08:44:48 -0600
Subject: [SCSI] megaraid_{mm,mbox}: 64-bit DMA capability checker

This patch contains
- a fix for 64-bit DMA capability check in megaraid_{mm,mbox} driver.
- includes changes (going back to 32-bit DMA mask if 64-bit DMA mask
failes) suggested by James with previous patch.
- addition of SATA 150-4/6 as commented by Vasily Averin.

With patch, the driver access PCIconfiguration space with dedicated
offset to read a signature. If the signature read, it means that the
controller has capability to handle 64-bit DMA.
Without this patch, the driver used to blindly claim 64-bit DMA
capability.
The issue has been reported by Vasily Averin [vvs@sw.ru].
Thank you Vasily for the reporting.

Signed-Off By: Seokmann Ju <seokmann.ju@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index c173806..fd8939e 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -1,3 +1,64 @@
+Release Date	: Fri May 19 09:31:45 EST 2006 - Seokmann Ju <sju@lsil.com>
+Current Version : 2.20.4.9 (scsi module), 2.20.2.6 (cmm module)
+Older Version	: 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
+
+1.	Fixed a bug in megaraid_init_mbox().
+	Customer reported "garbage in file on x86_64 platform".
+	Root Cause: the driver registered controllers as 64-bit DMA capable
+	for those which are not support it.
+	Fix: Made change in the function inserting identification machanism
+	identifying 64-bit DMA capable controllers.
+
+	> -----Original Message-----
+	> From: Vasily Averin [mailto:vvs@sw.ru]
+	> Sent: Thursday, May 04, 2006 2:49 PM
+	> To: linux-scsi@vger.kernel.org; Kolli, Neela; Mukker, Atul;
+	> Ju, Seokmann; Bagalkote, Sreenivas;
+	> James.Bottomley@SteelEye.com; devel@openvz.org
+	> Subject: megaraid_mbox: garbage in file
+	>
+	> Hello all,
+	>
+	> I've investigated customers claim on the unstable work of
+	> their node and found a
+	> strange effect: reading from some files leads to the
+	>  "attempt to access beyond end of device" messages.
+	>
+	> I've checked filesystem, memory on the node, motherboard BIOS
+	> version, but it
+	> does not help and issue still has been reproduced by simple
+	> file reading.
+	>
+	> Reproducer is simple:
+	>
+	> echo 0xffffffff >/proc/sys/dev/scsi/logging_level ;
+	> cat /vz/private/101/root/etc/ld.so.cache >/tmp/ttt  ;
+	> echo 0 >/proc/sys/dev/scsi/logging
+	>
+	> It leads to the following messages in dmesg
+	>
+	> sd_init_command: disk=sda, block=871769260, count=26
+	> sda : block=871769260
+	> sda : reading 26/26 512 byte blocks.
+	> scsi_add_timer: scmd: f79ed980, time: 7500, (c02b1420)
+	> sd 0:1:0:0: send 0xf79ed980                  sd 0:1:0:0:
+	>         command: Read (10): 28 00 33 f6 24 ac 00 00 1a 00
+	> buffer = 0xf7cfb540, bufflen = 13312, done = 0xc0366b40,
+	> queuecommand 0xc0344010
+	> leaving scsi_dispatch_cmnd()
+	> scsi_delete_timer: scmd: f79ed980, rtn: 1
+	> sd 0:1:0:0: done 0xf79ed980 SUCCESS        0 sd 0:1:0:0:
+	>         command: Read (10): 28 00 33 f6 24 ac 00 00 1a 00
+	> scsi host busy 1 failed 0
+	> sd 0:1:0:0: Notifying upper driver of completion (result 0)
+	> sd_rw_intr: sda: res=0x0
+	> 26 sectors total, 13312 bytes done.
+	> use_sg is 4
+	> attempt to access beyond end of device
+	> sda6: rw=0, want=1044134458, limit=951401367
+	> Buffer I/O error on device sda6, logical block 522067228
+	> attempt to access beyond end of device
+
 Release Date	: Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com>
 Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
 Older Version	: 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
diff --git a/drivers/scsi/megaraid/mega_common.h b/drivers/scsi/megaraid/mega_common.h
index 4675343..12e3812 100644
--- a/drivers/scsi/megaraid/mega_common.h
+++ b/drivers/scsi/megaraid/mega_common.h
@@ -37,6 +37,9 @@
 #define LSI_MAX_CHANNELS		16
 #define LSI_MAX_LOGICAL_DRIVES_64LD	(64+1)
 
+#define HBA_SIGNATURE_64_BIT		0x299
+#define PCI_CONF_AMISIG64		0xa4
+
 
 /**
  * scb_t - scsi command control block
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 9271513..e671af0 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -10,7 +10,7 @@
  *	   2 of the License, or (at your option) any later version.
  *
  * FILE		: megaraid_mbox.c
- * Version	: v2.20.4.8 (Apr 11 2006)
+ * Version	: v2.20.4.9 (Jul 16 2006)
  *
  * Authors:
  * 	Atul Mukker		<Atul.Mukker@lsil.com>
@@ -720,6 +720,7 @@ megaraid_init_mbox(adapter_t *adapter)
 	struct pci_dev		*pdev;
 	mraid_device_t		*raid_dev;
 	int			i;
+	uint32_t		magic64;
 
 
 	adapter->ito	= MBOX_TIMEOUT;
@@ -863,12 +864,33 @@ megaraid_init_mbox(adapter_t *adapter)
 
 	// Set the DMA mask to 64-bit. All supported controllers as capable of
 	// DMA in this range
-	if (pci_set_dma_mask(adapter->pdev, DMA_64BIT_MASK) != 0) {
-
-		con_log(CL_ANN, (KERN_WARNING
-			"megaraid: could not set DMA mask for 64-bit.\n"));
+	pci_read_config_dword(adapter->pdev, PCI_CONF_AMISIG64, &magic64);
+
+	if (((magic64 == HBA_SIGNATURE_64_BIT) &&
+		((adapter->pdev->subsystem_device !=
+		PCI_SUBSYS_ID_MEGARAID_SATA_150_6) ||
+		(adapter->pdev->subsystem_device !=
+		PCI_SUBSYS_ID_MEGARAID_SATA_150_4))) ||
+		(adapter->pdev->vendor == PCI_VENDOR_ID_LSI_LOGIC &&
+		adapter->pdev->device == PCI_DEVICE_ID_VERDE) ||
+		(adapter->pdev->vendor == PCI_VENDOR_ID_LSI_LOGIC &&
+		adapter->pdev->device == PCI_DEVICE_ID_DOBSON) ||
+		(adapter->pdev->vendor == PCI_VENDOR_ID_LSI_LOGIC &&
+		adapter->pdev->device == PCI_DEVICE_ID_LINDSAY) ||
+		(adapter->pdev->vendor == PCI_VENDOR_ID_DELL &&
+		adapter->pdev->device == PCI_DEVICE_ID_PERC4_DI_EVERGLADES) ||
+		(adapter->pdev->vendor == PCI_VENDOR_ID_DELL &&
+		adapter->pdev->device == PCI_DEVICE_ID_PERC4E_DI_KOBUK)) {
+		if (pci_set_dma_mask(adapter->pdev, DMA_64BIT_MASK)) {
+			con_log(CL_ANN, (KERN_WARNING
+				"megaraid: DMA mask for 64-bit failed\n"));
 
-		goto out_free_sysfs_res;
+			if (pci_set_dma_mask (adapter->pdev, DMA_32BIT_MASK)) {
+				con_log(CL_ANN, (KERN_WARNING
+					"megaraid: 32-bit DMA mask failed\n"));
+				goto out_free_sysfs_res;
+			}
+		}
 	}
 
 	// setup tasklet for DPC
diff --git a/drivers/scsi/megaraid/megaraid_mbox.h b/drivers/scsi/megaraid/megaraid_mbox.h
index 868fb0e..2b5a328 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.h
+++ b/drivers/scsi/megaraid/megaraid_mbox.h
@@ -21,8 +21,8 @@
 #include "megaraid_ioctl.h"
 
 
-#define MEGARAID_VERSION	"2.20.4.8"
-#define MEGARAID_EXT_VERSION	"(Release Date: Mon Apr 11 12:27:22 EST 2006)"
+#define MEGARAID_VERSION	"2.20.4.9"
+#define MEGARAID_EXT_VERSION	"(Release Date: Sun Jul 16 12:27:22 EST 2006)"
 
 
 /*
-- 
cgit v0.10.2


From aa677bc7445147f663ebde69d248a30839bada76 Mon Sep 17 00:00:00 2001
From: "Ju, Seokmann" <Seokmann.Ju@lsil.com>
Date: Tue, 25 Jul 2006 08:44:58 -0600
Subject: [SCSI] megaraid_{mm,mbox}: a fix on INQUIRY with EVPD

With this patch, driver will protect data corruption created by
INQUIRY with EVPD request to megaraid controllers.  As specified in
the changelog, megaraid F/W already has fixed the issue and being
under process of release. Meanwhile, driver will protect the system
with this patch.

Signed-Off By: Seokmann Ju <seokmann.ju@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index fd8939e..0edb048 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -59,6 +59,13 @@ Older Version	: 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
 	> Buffer I/O error on device sda6, logical block 522067228
 	> attempt to access beyond end of device
 
+2.	When INQUIRY with EVPD bit set issued to the MegaRAID controller,
+	system memory gets corrupted.
+	Root Cause: MegaRAID F/W handle the INQUIRY with EVPD bit set
+	incorrectly.
+	Fix: MegaRAID F/W has fixed the problem and being process of release,
+	soon. Meanwhile, driver will filter out the request.
+
 Release Date	: Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com>
 Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
 Older Version	: 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
diff --git a/drivers/scsi/megaraid/mega_common.h b/drivers/scsi/megaraid/mega_common.h
index 12e3812..8cd0bd1 100644
--- a/drivers/scsi/megaraid/mega_common.h
+++ b/drivers/scsi/megaraid/mega_common.h
@@ -40,6 +40,9 @@
 #define HBA_SIGNATURE_64_BIT		0x299
 #define PCI_CONF_AMISIG64		0xa4
 
+#define MEGA_SCSI_INQ_EVPD		1
+#define MEGA_INVALID_FIELD_IN_CDB	0x24
+
 
 /**
  * scb_t - scsi command control block
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index e671af0..cd982c8 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -1644,6 +1644,14 @@ megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy)
 				rdev->last_disp |= (1L << SCP2CHANNEL(scp));
 			}
 
+			if (scp->cmnd[1] & MEGA_SCSI_INQ_EVPD) {
+				scp->sense_buffer[0] = 0x70;
+				scp->sense_buffer[2] = ILLEGAL_REQUEST;
+				scp->sense_buffer[12] = MEGA_INVALID_FIELD_IN_CDB;
+				scp->result = CHECK_CONDITION << 1;
+				return NULL;
+			}
+
 			/* Fall through */
 
 		case READ_CAPACITY:
-- 
cgit v0.10.2


From 0b4972d59170e13ab0236e8a7148112052590c01 Mon Sep 17 00:00:00 2001
From: "Ju, Seokmann" <Seokmann.Ju@lsil.com>
Date: Tue, 25 Jul 2006 08:45:06 -0600
Subject: [SCSI] megaraid_{mm,mbox}: a fix on "kernel unaligned access address"
 issue

There was an issue in the data structure defined by megaraid driver
casuing "kernel unaligned access.." messages to be displayed during
IOCTL on IA64 platform.

The issue has been reported/fixed by Sakurai Hiroomi
[sakurai_hiro@soft.fujitsu.com].

Signed-Off By: Seokmann Ju <seokmann.ju@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index 0edb048..a056bbe 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -66,6 +66,61 @@ Older Version	: 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
 	Fix: MegaRAID F/W has fixed the problem and being process of release,
 	soon. Meanwhile, driver will filter out the request.
 
+3.	One of member in the data structure of the driver leads unaligne
+	issue on 64-bit platform.
+	Customer reporeted "kernel unaligned access addrss" issue when
+	application communicates with MegaRAID HBA driver.
+	Root Cause: in uioc_t structure, one of member had misaligned and it
+	led system to display the error message.
+	Fix: A patch submitted to community from following folk.
+
+	> -----Original Message-----
+	> From: linux-scsi-owner@vger.kernel.org
+	> [mailto:linux-scsi-owner@vger.kernel.org] On Behalf Of Sakurai Hiroomi
+	> Sent: Wednesday, July 12, 2006 4:20 AM
+	> To: linux-scsi@vger.kernel.org; linux-kernel@vger.kernel.org
+	> Subject: Re: Help: strange messages from kernel on IA64 platform
+	>
+	> Hi,
+	>
+	> I saw same message.
+	>
+	> When GAM(Global Array Manager) is started, The following
+	> message output.
+	> kernel: kernel unaligned access to 0xe0000001fe1080d4,
+	> ip=0xa000000200053371
+	>
+	> The uioc structure used by ioctl is defined by packed,
+	> the allignment of each member are disturbed.
+	> In a 64 bit structure, the allignment of member doesn't fit 64 bit
+	> boundary. this causes this messages.
+	> In a 32 bit structure, we don't see the message because the allinment
+	> of member fit 32 bit boundary even if packed is specified.
+	>
+	> patch
+	> I Add 32 bit dummy member to fit 64 bit boundary. I tested.
+	> We confirmed this patch fix the problem by IA64 server.
+	>
+	> **************************************************************
+	> ****************
+	> --- linux-2.6.9/drivers/scsi/megaraid/megaraid_ioctl.h.orig
+	> 2006-04-03 17:13:03.000000000 +0900
+	> +++ linux-2.6.9/drivers/scsi/megaraid/megaraid_ioctl.h
+	> 2006-04-03 17:14:09.000000000 +0900
+	> @@ -132,6 +132,10 @@
+	>  /* Driver Data: */
+	>          void __user *           user_data;
+	>          uint32_t                user_data_len;
+	> +
+	> +        /* 64bit alignment */
+	> +        uint32_t                pad_0xBC;
+	> +
+	>          mraid_passthru_t        __user *user_pthru;
+	>
+	>          mraid_passthru_t        *pthru32;
+	> **************************************************************
+	> ****************
+
 Release Date	: Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com>
 Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
 Older Version	: 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
diff --git a/drivers/scsi/megaraid/megaraid_ioctl.h b/drivers/scsi/megaraid/megaraid_ioctl.h
index bdaee14..b8aa342 100644
--- a/drivers/scsi/megaraid/megaraid_ioctl.h
+++ b/drivers/scsi/megaraid/megaraid_ioctl.h
@@ -132,6 +132,10 @@ typedef struct uioc {
 /* Driver Data: */
 	void __user *		user_data;
 	uint32_t		user_data_len;
+
+	/* 64bit alignment */
+	uint32_t                pad_for_64bit_align;
+
 	mraid_passthru_t	__user *user_pthru;
 
 	mraid_passthru_t	*pthru32;
diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c
index e8f534f..d85b9a8 100644
--- a/drivers/scsi/megaraid/megaraid_mm.c
+++ b/drivers/scsi/megaraid/megaraid_mm.c
@@ -10,7 +10,7 @@
  *	   2 of the License, or (at your option) any later version.
  *
  * FILE		: megaraid_mm.c
- * Version	: v2.20.2.6 (Mar 7 2005)
+ * Version	: v2.20.2.7 (Jul 16 2006)
  *
  * Common management module
  */
diff --git a/drivers/scsi/megaraid/megaraid_mm.h b/drivers/scsi/megaraid/megaraid_mm.h
index 3d9e67d..c8762b2 100644
--- a/drivers/scsi/megaraid/megaraid_mm.h
+++ b/drivers/scsi/megaraid/megaraid_mm.h
@@ -27,9 +27,9 @@
 #include "megaraid_ioctl.h"
 
 
-#define LSI_COMMON_MOD_VERSION	"2.20.2.6"
+#define LSI_COMMON_MOD_VERSION	"2.20.2.7"
 #define LSI_COMMON_MOD_EXT_VERSION	\
-		"(Release Date: Mon Mar 7 00:01:03 EST 2005)"
+		"(Release Date: Sun Jul 16 00:01:03 EST 2006)"
 
 
 #define LSI_DBGLVL			dbglevel
-- 
cgit v0.10.2


From 0dfda7751e969902f7231fc547ca8c9e0fb9ee13 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Jul 2006 09:01:36 +0200
Subject: [SCSI] fix simscsi

On Thu, Jul 27, 2006 at 01:03:24AM -0700, Andrew Morton wrote:
> arch/ia64/hp/sim/simscsi.c: In function `simscsi_sg_readwrite':
> arch/ia64/hp/sim/simscsi.c:154: error: structure has no member named `buffer'
> arch/ia64/hp/sim/simscsi.c: In function `simscsi_fillresult':
> arch/ia64/hp/sim/simscsi.c:247: error: structure has no member named `buffer'
> arch/ia64/hp/sim/simscsi.c: At top level:
> arch/ia64/hp/sim/simscsi.c:87: warning: 'simscsi_setup' defined but not used

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index a3fe975..8f0a16a7 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -151,7 +151,7 @@ static void
 simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
 {
 	int list_len = sc->use_sg;
-	struct scatterlist *sl = (struct scatterlist *)sc->buffer;
+	struct scatterlist *sl = (struct scatterlist *)sc->request_buffer;
 	struct disk_stat stat;
 	struct disk_req req;
 
@@ -244,7 +244,8 @@ static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
 
 	if (scatterlen == 0)
 		memcpy(sc->request_buffer, buf, len);
-	else for (slp = (struct scatterlist *)sc->buffer; scatterlen-- > 0 && len > 0; slp++) {
+	else for (slp = (struct scatterlist *)sc->request_buffer;
+		  scatterlen-- > 0 && len > 0; slp++) {
 		unsigned thislen = min(len, slp->length);
 
 		memcpy(page_address(slp->page) + slp->offset, buf, thislen);
-- 
cgit v0.10.2


From 64ba88182962698a8cb75792372c1dddaef82989 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Wed, 2 Aug 2006 15:24:34 -0400
Subject: [SCSI] lpfc 8.1.7 : Add statistics reset callback for FC transport

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 5c68cdd..762d623 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1210,8 +1210,10 @@ lpfc_get_stats(struct Scsi_Host *shost)
 	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
 	struct lpfc_sli *psli = &phba->sli;
 	struct fc_host_statistics *hs = &phba->link_stats;
+	struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets;
 	LPFC_MBOXQ_t *pmboxq;
 	MAILBOX_t *pmb;
+	unsigned long seconds;
 	int rc = 0;
 
 	pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -1272,22 +1274,103 @@ lpfc_get_stats(struct Scsi_Host *shost)
 	hs->invalid_crc_count = pmb->un.varRdLnk.crcCnt;
 	hs->error_frames = pmb->un.varRdLnk.crcCnt;
 
+	hs->link_failure_count -= lso->link_failure_count;
+	hs->loss_of_sync_count -= lso->loss_of_sync_count;
+	hs->loss_of_signal_count -= lso->loss_of_signal_count;
+	hs->prim_seq_protocol_err_count -= lso->prim_seq_protocol_err_count;
+	hs->invalid_tx_word_count -= lso->invalid_tx_word_count;
+	hs->invalid_crc_count -= lso->invalid_crc_count;
+	hs->error_frames -= lso->error_frames;
+
 	if (phba->fc_topology == TOPOLOGY_LOOP) {
 		hs->lip_count = (phba->fc_eventTag >> 1);
+		hs->lip_count -= lso->link_events;
 		hs->nos_count = -1;
 	} else {
 		hs->lip_count = -1;
 		hs->nos_count = (phba->fc_eventTag >> 1);
+		hs->nos_count -= lso->link_events;
 	}
 
 	hs->dumped_frames = -1;
 
-/* FIX ME */
-	/*hs->SecondsSinceLastReset = (jiffies - lpfc_loadtime) / HZ;*/
+	seconds = get_seconds();
+	if (seconds < psli->stats_start)
+		hs->seconds_since_last_reset = seconds +
+				((unsigned long)-1 - psli->stats_start);
+	else
+		hs->seconds_since_last_reset = seconds - psli->stats_start;
 
 	return hs;
 }
 
+static void
+lpfc_reset_stats(struct Scsi_Host *shost)
+{
+	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
+	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets;
+	LPFC_MBOXQ_t *pmboxq;
+	MAILBOX_t *pmb;
+	int rc = 0;
+
+	pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!pmboxq)
+		return;
+	memset(pmboxq, 0, sizeof(LPFC_MBOXQ_t));
+
+	pmb = &pmboxq->mb;
+	pmb->mbxCommand = MBX_READ_STATUS;
+	pmb->mbxOwner = OWN_HOST;
+	pmb->un.varWords[0] = 0x1; /* reset request */
+	pmboxq->context1 = NULL;
+
+	if ((phba->fc_flag & FC_OFFLINE_MODE) ||
+		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+
+	if (rc != MBX_SUCCESS) {
+		if (rc == MBX_TIMEOUT)
+			pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+		else
+			mempool_free(pmboxq, phba->mbox_mem_pool);
+		return;
+	}
+
+	memset(pmboxq, 0, sizeof(LPFC_MBOXQ_t));
+	pmb->mbxCommand = MBX_READ_LNK_STAT;
+	pmb->mbxOwner = OWN_HOST;
+	pmboxq->context1 = NULL;
+
+	if ((phba->fc_flag & FC_OFFLINE_MODE) ||
+	    (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+	else
+		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+
+	if (rc != MBX_SUCCESS) {
+		if (rc == MBX_TIMEOUT)
+			pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+		else
+			mempool_free( pmboxq, phba->mbox_mem_pool);
+		return;
+	}
+
+	lso->link_failure_count = pmb->un.varRdLnk.linkFailureCnt;
+	lso->loss_of_sync_count = pmb->un.varRdLnk.lossSyncCnt;
+	lso->loss_of_signal_count = pmb->un.varRdLnk.lossSignalCnt;
+	lso->prim_seq_protocol_err_count = pmb->un.varRdLnk.primSeqErrCnt;
+	lso->invalid_tx_word_count = pmb->un.varRdLnk.invalidXmitWord;
+	lso->invalid_crc_count = pmb->un.varRdLnk.crcCnt;
+	lso->error_frames = pmb->un.varRdLnk.crcCnt;
+	lso->link_events = (phba->fc_eventTag >> 1);
+
+	psli->stats_start = get_seconds();
+
+	return;
+}
 
 /*
  * The LPFC driver treats linkdown handling as target loss events so there
@@ -1431,8 +1514,7 @@ struct fc_function_template lpfc_transport_functions = {
 	 */
 
 	.get_fc_host_stats = lpfc_get_stats,
-
-	/* the LPFC driver doesn't support resetting stats yet */
+	.reset_fc_host_stats = lpfc_reset_stats,
 
 	.dd_fcrport_size = sizeof(struct lpfc_rport_data),
 	.show_rport_maxframe_size = 1,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 350a625..f453fcc 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1792,6 +1792,9 @@ lpfc_sli_brdrestart(struct lpfc_hba * phba)
 
 	spin_unlock_irq(phba->host->host_lock);
 
+	memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets));
+	psli->stats_start = get_seconds();
+
 	if (skip_post)
 		mdelay(100);
 	else
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index d8ef0d2..478e799 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -172,6 +172,18 @@ struct lpfc_sli_stat {
 	uint32_t mbox_busy;	 /* Mailbox cmd busy */
 };
 
+/* Structure to store link status values when port stats are reset */
+struct lpfc_lnk_stat {
+	uint32_t link_failure_count;
+	uint32_t loss_of_sync_count;
+	uint32_t loss_of_signal_count;
+	uint32_t prim_seq_protocol_err_count;
+	uint32_t invalid_tx_word_count;
+	uint32_t invalid_crc_count;
+	uint32_t error_frames;
+	uint32_t link_events;
+};
+
 /* Structure used to hold SLI information */
 struct lpfc_sli {
 	uint32_t num_rings;
@@ -201,6 +213,8 @@ struct lpfc_sli {
 	struct lpfc_iocbq ** iocbq_lookup; /* array to lookup IOCB by IOTAG */
 	size_t iocbq_lookup_len;           /* current lengs of the array */
 	uint16_t  last_iotag;              /* last allocated IOTAG */
+	unsigned long  stats_start;        /* in seconds */
+	struct lpfc_lnk_stat lnk_stat_offsets;
 };
 
 /* Given a pointer to the start of the ring, and the slot number of
-- 
cgit v0.10.2


From a309a6b6e64b8147b7cfe7388237fb83834f58c9 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Tue, 1 Aug 2006 07:33:43 -0400
Subject: [SCSI] lpfc 8.1.7 : Fix failing firmware download due to mailbox
 delays needing to be longer

Fix failing firmware download due to mailbox delays needing to be longer.

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 762d623..4ccce14 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1000,7 +1000,8 @@ sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
 			spin_unlock_irq(phba->host->host_lock);
 			rc = lpfc_sli_issue_mbox_wait (phba,
 						       phba->sysfs_mbox.mbox,
-						       phba->fc_ratov * 2);
+				lpfc_mbox_tmo_val(phba,
+				    phba->sysfs_mbox.mbox->mb.mbxCommand) * HZ);
 			spin_lock_irq(phba->host->host_lock);
 		}
 
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 517e9e4..2a17646 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -127,6 +127,7 @@ void lpfc_config_port(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_kill_board(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
 LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *);
+int lpfc_mbox_tmo_val(struct lpfc_hba *, int);
 
 int lpfc_mem_alloc(struct lpfc_hba *);
 void lpfc_mem_free(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index e42f22a..4d016c2 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -651,3 +651,19 @@ lpfc_mbox_get(struct lpfc_hba * phba)
 
 	return mbq;
 }
+
+int
+lpfc_mbox_tmo_val(struct lpfc_hba *phba, int cmd)
+{
+	switch (cmd) {
+	case MBX_WRITE_NV:	/* 0x03 */
+	case MBX_UPDATE_CFG:	/* 0x1B */
+	case MBX_DOWN_LOAD:	/* 0x1C */
+	case MBX_DEL_LD_ENTRY:	/* 0x1D */
+	case MBX_LOAD_AREA:	/* 0x81 */
+	case MBX_FLASH_WR_ULA:  /* 0x98 */
+	case MBX_LOAD_EXP_ROM:	/* 0x9C */
+		return LPFC_MBOX_TMO_FLASH_CMD;
+	}
+	return LPFC_MBOX_TMO;
+}
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index f453fcc..4dc2d58 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2197,7 +2197,8 @@ lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag)
 			return (MBX_NOT_FINISHED);
 		}
 		/* timeout active mbox command */
-		mod_timer(&psli->mbox_tmo, jiffies + HZ * LPFC_MBOX_TMO);
+		mod_timer(&psli->mbox_tmo, (jiffies +
+			       (HZ * lpfc_mbox_tmo_val(phba, mb->mbxCommand))));
 	}
 
 	/* Mailbox cmd <cmd> issue */
@@ -2257,7 +2258,6 @@ lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag)
 		break;
 
 	case MBX_POLL:
-		i = 0;
 		psli->mbox_active = NULL;
 		if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
 			/* First read mbox status word */
@@ -2271,11 +2271,14 @@ lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag)
 		/* Read the HBA Host Attention Register */
 		ha_copy = readl(phba->HAregaddr);
 
+		i = lpfc_mbox_tmo_val(phba, mb->mbxCommand);
+		i *= 1000; /* Convert to ms */
+
 		/* Wait for command to complete */
 		while (((word0 & OWN_CHIP) == OWN_CHIP) ||
 		       (!(ha_copy & HA_MBATT) &&
 			(phba->hba_state > LPFC_WARM_START))) {
-			if (i++ >= 100) {
+			if (i-- <= 0) {
 				psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 				spin_unlock_irqrestore(phba->host->host_lock,
 						       drvr_flag);
@@ -2293,7 +2296,7 @@ lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag)
 
 			/* Can be in interrupt context, do not sleep */
 			/* (or might be called with interrupts disabled) */
-			mdelay(i);
+			mdelay(1);
 
 			spin_lock_irqsave(phba->host->host_lock, drvr_flag);
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 478e799..e26de6809 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -225,3 +225,9 @@ struct lpfc_sli {
 
 #define LPFC_MBOX_TMO           30	/* Sec tmo for outstanding mbox
 					   command */
+#define LPFC_MBOX_TMO_FLASH_CMD 300     /* Sec tmo for outstanding FLASH write
+					 * or erase cmds. This is especially
+					 * long because of the potential of
+					 * multiple flash erases that can be
+					 * spawned.
+					 */
-- 
cgit v0.10.2


From 1c067a42413c4f39c907443b8f5979cd4d82f0ff Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Tue, 1 Aug 2006 07:33:52 -0400
Subject: [SCSI] lpfc 8.1.7 : Fix race condition between lpfc_sli_issue_mbox
 and lpfc_online

Fix race condition between lpfc_sli_issue_mbox and lpfc_online

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 4dc2d58..fbf108c 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1711,15 +1711,13 @@ lpfc_sli_brdreset(struct lpfc_hba * phba)
 	phba->fc_myDID = 0;
 	phba->fc_prevDID = 0;
 
-	psli->sli_flag = 0;
-
 	/* Turn off parity checking and serr during the physical reset */
 	pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
 	pci_write_config_word(phba->pcidev, PCI_COMMAND,
 			      (cfg_value &
 			       ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
 
-	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+	psli->sli_flag &= ~(LPFC_SLI2_ACTIVE | LPFC_PROCESS_LA);
 	/* Now toggle INITFF bit in the Host Control Register */
 	writel(HC_INITFF, phba->HCregaddr);
 	mdelay(1);
@@ -1905,6 +1903,9 @@ lpfc_sli_hba_setup(struct lpfc_hba * phba)
 	}
 
 	while (resetcount < 2 && !done) {
+		spin_lock_irq(phba->host->host_lock);
+		phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+		spin_unlock_irq(phba->host->host_lock);
 		phba->hba_state = LPFC_STATE_UNKNOWN;
 		lpfc_sli_brdrestart(phba);
 		msleep(2500);
@@ -1912,6 +1913,9 @@ lpfc_sli_hba_setup(struct lpfc_hba * phba)
 		if (rc)
 			break;
 
+		spin_lock_irq(phba->host->host_lock);
+		phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+		spin_unlock_irq(phba->host->host_lock);
 		resetcount++;
 
 	/* Call pre CONFIG_PORT mailbox command initialization.  A value of 0
-- 
cgit v0.10.2


From 8f6d98d2e05c38cb08fbda5e38e5f87d361c3cac Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Tue, 1 Aug 2006 07:34:00 -0400
Subject: [SCSI] lpfc 8.1.7 : Short bug fixes

Short bug fixes:
 - Fix iocbq list corruption due to missing list_del's in ct handling
 - Missing unlock in lpfc_sli_next_iotag()
 - Fix initialization of can_queue value
 - Differentiate sysfs mailbox errors with different codes.

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4ccce14..76f8bd5 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -884,7 +884,7 @@ sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
 		    phba->sysfs_mbox.mbox   == NULL ) {
 			sysfs_mbox_idle(phba);
 			spin_unlock_irq(host->host_lock);
-			return -EINVAL;
+			return -EAGAIN;
 		}
 	}
 
@@ -1008,7 +1008,7 @@ sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
 		if (rc != MBX_SUCCESS) {
 			sysfs_mbox_idle(phba);
 			spin_unlock_irq(host->host_lock);
-			return -ENODEV;
+			return  (rc == MBX_TIMEOUT) ? -ETIME : -ENODEV;
 		}
 		phba->sysfs_mbox.state = SMBOX_READING;
 	}
@@ -1017,7 +1017,7 @@ sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
 		printk(KERN_WARNING  "mbox_read: Bad State\n");
 		sysfs_mbox_idle(phba);
 		spin_unlock_irq(host->host_lock);
-		return -EINVAL;
+		return -EAGAIN;
 	}
 
 	memcpy(buf, (uint8_t *) & phba->sysfs_mbox.mbox->mb + off, count);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index b65ee57..cab2d9d 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -131,6 +131,7 @@ lpfc_ct_unsol_event(struct lpfc_hba * phba,
 	}
 
 ct_unsol_event_exit_piocbq:
+	list_del(&head);
 	if (pmbuf) {
 		list_for_each_entry_safe(matp, next_matp, &pmbuf->list, list) {
 			lpfc_mbuf_free(phba, matp->virt, matp->phys);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index ef47b82..16dc8c8 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1616,7 +1616,11 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_free_iocbq;
 	}
 
-	/* We can rely on a queue depth attribute only after SLI HBA setup */
+	/*
+	 * Set initial can_queue value since 0 is no longer supported and
+	 * scsi_add_host will fail. This will be adjusted later based on the
+	 * max xri value determined in hba setup.
+	 */
 	host->can_queue = phba->cfg_hba_queue_depth - 10;
 
 	/* Tell the midlayer we support 16 byte commands */
@@ -1656,6 +1660,12 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_free_irq;
 	}
 
+	/*
+	 * hba setup may have changed the hba_queue_depth so we need to adjust
+	 * the value of can_queue.
+	 */
+	host->can_queue = phba->cfg_hba_queue_depth - 10;
+
 	lpfc_discovery_wait(phba);
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index fbf108c..9802ee8 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -320,7 +320,8 @@ lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq)
 			kfree(old_arr);
 			return iotag;
 		}
-	}
+	} else
+		spin_unlock_irq(phba->host->host_lock);
 
 	lpfc_printf_log(phba, KERN_ERR,LOG_SLI,
 			"%d:0318 Failed to allocate IOTAG.last IOTAG is %d\n",
@@ -1399,11 +1400,11 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba * phba,
 								 next_iocb,
 								 &saveq->list,
 								 list) {
+						list_del(&rspiocbp->list);
 						lpfc_sli_release_iocbq(phba,
 								     rspiocbp);
 					}
 				}
-
 				lpfc_sli_release_iocbq(phba, saveq);
 			}
 		}
-- 
cgit v0.10.2


From dca9479be891eac33e53ab37769e18c3259646da Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Tue, 1 Aug 2006 07:34:08 -0400
Subject: [SCSI] lpfc 8.1.7 : ID String and Message fixes

ID String and Message fixes
 - Fix switch symbolic name registration to match cross-OS values
 - Replace printk's with more standard lpfc_printf_log calls
 - Make all lpfc_printf_log message numbers unique

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index cab2d9d..bbb7310 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -482,7 +482,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
 		if (CTrsp->CommandResponse.bits.CmdRsp ==
 		    be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
 			lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
-					"%d:0239 NameServer Rsp "
+					"%d:0208 NameServer Rsp "
 					"Data: x%x\n",
 					phba->brd_no,
 					phba->fc_flag);
@@ -589,13 +589,9 @@ lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp)
 
 	lpfc_decode_firmware_rev(phba, fwrev, 0);
 
-	if (phba->Port[0]) {
-		sprintf(symbp, "Emulex %s Port %s FV%s DV%s", phba->ModelName,
-			phba->Port, fwrev, lpfc_release_version);
-	} else {
-		sprintf(symbp, "Emulex %s FV%s DV%s", phba->ModelName,
-			fwrev, lpfc_release_version);
-	}
+	sprintf(symbp, "Emulex %s FV%s DV%s", phba->ModelName,
+		fwrev, lpfc_release_version);
+	return;
 }
 
 /*
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index b89f6cb..60f5cca 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2839,7 +2839,7 @@ lpfc_els_rsp_rps_acc(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 
 	/* Xmit ELS RPS ACC response tag <ulpIoTag> */
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
-			"%d:0128 Xmit ELS RPS ACC response tag x%x "
+			"%d:0118 Xmit ELS RPS ACC response tag x%x "
 			"Data: x%x x%x x%x x%x x%x\n",
 			phba->brd_no,
 			elsiocb->iocb.ulpIoTag,
@@ -2948,7 +2948,7 @@ lpfc_els_rsp_rpl_acc(struct lpfc_hba * phba, uint16_t cmdsize,
 
 	/* Xmit ELS RPL ACC response tag <ulpIoTag> */
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
-			"%d:0128 Xmit ELS RPL ACC response tag x%x "
+			"%d:0120 Xmit ELS RPL ACC response tag x%x "
 			"Data: x%x x%x x%x x%x x%x\n",
 			phba->brd_no,
 			elsiocb->iocb.ulpIoTag,
@@ -3109,7 +3109,7 @@ lpfc_els_rcv_fan(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
 	struct lpfc_nodelist *ndlp, *next_ndlp;
 
 	/* FAN received */
-	lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "%d:265 FAN received\n",
+	lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "%d:0265 FAN received\n",
 								phba->brd_no);
 
 	icmd = &cmdiocb->iocb;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 4d6cf99..1c3f268 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1782,7 +1782,7 @@ lpfc_findnode_did(struct lpfc_hba * phba, uint32_t order, uint32_t did)
 				/* LOG change to REGLOGIN */
 				/* FIND node DID reglogin */
 				lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
-						"%d:0931 FIND node DID reglogin"
+						"%d:0901 FIND node DID reglogin"
 						" Data: x%p x%x x%x x%x\n",
 						phba->brd_no,
 						ndlp, ndlp->nlp_DID,
@@ -1805,7 +1805,7 @@ lpfc_findnode_did(struct lpfc_hba * phba, uint32_t order, uint32_t did)
 				/* LOG change to PRLI */
 				/* FIND node DID prli */
 				lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
-						"%d:0931 FIND node DID prli "
+						"%d:0902 FIND node DID prli "
 						"Data: x%p x%x x%x x%x\n",
 						phba->brd_no,
 						ndlp, ndlp->nlp_DID,
@@ -1828,7 +1828,7 @@ lpfc_findnode_did(struct lpfc_hba * phba, uint32_t order, uint32_t did)
 				/* LOG change to NPR */
 				/* FIND node DID npr */
 				lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
-						"%d:0931 FIND node DID npr "
+						"%d:0903 FIND node DID npr "
 						"Data: x%p x%x x%x x%x\n",
 						phba->brd_no,
 						ndlp, ndlp->nlp_DID,
@@ -1851,7 +1851,7 @@ lpfc_findnode_did(struct lpfc_hba * phba, uint32_t order, uint32_t did)
 				/* LOG change to UNUSED */
 				/* FIND node DID unused */
 				lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
-						"%d:0931 FIND node DID unused "
+						"%d:0905 FIND node DID unused "
 						"Data: x%p x%x x%x x%x\n",
 						phba->brd_no,
 						ndlp, ndlp->nlp_DID,
@@ -2335,7 +2335,7 @@ lpfc_disc_timeout_handler(struct lpfc_hba *phba)
 		initlinkmbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (!initlinkmbox) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
-					"%d:0226 Device Discovery "
+					"%d:0206 Device Discovery "
 					"completion error\n",
 					phba->brd_no);
 			phba->hba_state = LPFC_HBA_ERROR;
@@ -2365,7 +2365,7 @@ lpfc_disc_timeout_handler(struct lpfc_hba *phba)
 		if (!clearlambox) {
 			clrlaerr = 1;
 			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
-					"%d:0226 Device Discovery "
+					"%d:0207 Device Discovery "
 					"completion error\n",
 					phba->brd_no);
 			phba->hba_state = LPFC_HBA_ERROR;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index bd0b0e2..b38021a 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -179,7 +179,7 @@ lpfc_els_abort(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
 
 	/* Abort outstanding I/O on NPort <nlp_DID> */
 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
-			"%d:0201 Abort outstanding I/O on NPort x%x "
+			"%d:0205 Abort outstanding I/O on NPort x%x "
 			"Data: x%x x%x x%x\n",
 			phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
 			ndlp->nlp_state, ndlp->nlp_rpi);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index a760a44..0811c82 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1104,7 +1104,7 @@ lpfc_reset_bus_handler(struct scsi_cmnd *cmnd)
 					  ndlp->rport->dd_data);
 		if (ret != SUCCESS) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-				"%d:0713 Bus Reset on target %d failed\n",
+				"%d:0700 Bus Reset on target %d failed\n",
 				phba->brd_no, i);
 			err_count++;
 		}
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 9802ee8..70f4d5a 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -970,9 +970,11 @@ void lpfc_sli_poll_fcp_ring(struct lpfc_hba * phba)
 			 * resources need to be recovered.
 			 */
 			if (unlikely(irsp->ulpCommand == CMD_XRI_ABORTED_CX)) {
-				printk(KERN_INFO "%s: IOCB cmd 0x%x processed."
-				       " Skipping completion\n", __FUNCTION__,
-				       irsp->ulpCommand);
+				lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+						"%d:0314 IOCB cmd 0x%x"
+						" processed. Skipping"
+						" completion", phba->brd_no,
+						irsp->ulpCommand);
 				break;
 			}
 
@@ -1105,7 +1107,7 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba * phba,
 		if (unlikely(irsp->ulpStatus)) {
 			/* Rsp ring <ringno> error: IOCB */
 			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-				"%d:0326 Rsp Ring %d error: IOCB Data: "
+				"%d:0336 Rsp Ring %d error: IOCB Data: "
 				"x%x x%x x%x x%x x%x x%x x%x x%x\n",
 				phba->brd_no, pring->ringno,
 				irsp->un.ulpWord[0], irsp->un.ulpWord[1],
@@ -1123,9 +1125,11 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba * phba,
 			 * resources need to be recovered.
 			 */
 			if (unlikely(irsp->ulpCommand == CMD_XRI_ABORTED_CX)) {
-				printk(KERN_INFO "%s: IOCB cmd 0x%x processed. "
-				       "Skipping completion\n", __FUNCTION__,
-				       irsp->ulpCommand);
+				lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+						"%d:0333 IOCB cmd 0x%x"
+						" processed. Skipping"
+						" completion\n", phba->brd_no,
+						irsp->ulpCommand);
 				break;
 			}
 
@@ -1156,7 +1160,7 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba * phba,
 			} else {
 				/* Unknown IOCB command */
 				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"%d:0321 Unknown IOCB command "
+					"%d:0334 Unknown IOCB command "
 					"Data: x%x, x%x x%x x%x x%x\n",
 					phba->brd_no, type, irsp->ulpCommand,
 					irsp->ulpStatus, irsp->ulpIoTag,
@@ -1239,7 +1243,7 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba * phba,
 		lpfc_printf_log(phba,
 				KERN_ERR,
 				LOG_SLI,
-				"%d:0312 Ring %d handler: portRspPut %d "
+				"%d:0303 Ring %d handler: portRspPut %d "
 				"is bigger then rsp ring %d\n",
 				phba->brd_no,
 				pring->ringno, portRspPut, portRspMax);
@@ -1384,7 +1388,7 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba * phba,
 					lpfc_printf_log(phba,
 						KERN_ERR,
 						LOG_SLI,
-						"%d:0321 Unknown IOCB command "
+						"%d:0335 Unknown IOCB command "
 						"Data: x%x x%x x%x x%x\n",
 						phba->brd_no,
 						irsp->ulpCommand,
@@ -1759,7 +1763,7 @@ lpfc_sli_brdrestart(struct lpfc_hba * phba)
 
 	/* Restart HBA */
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"%d:0328 Restart HBA Data: x%x x%x\n", phba->brd_no,
+			"%d:0337 Restart HBA Data: x%x x%x\n", phba->brd_no,
 			phba->hba_state, psli->sli_flag);
 
 	word0 = 0;
@@ -3016,7 +3020,7 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba,
 
 		if (timeleft == 0) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"%d:0329 IOCB wait timeout error - no "
+					"%d:0338 IOCB wait timeout error - no "
 					"wake response Data x%x\n",
 					phba->brd_no, timeout);
 			retval = IOCB_TIMEDOUT;
-- 
cgit v0.10.2


From 416780d3a7836a99568c6728830ced592a7a22e3 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Tue, 1 Aug 2006 07:34:16 -0400
Subject: [SCSI] lpfc 8.1.7 : Change version number to 8.1.8

Change version number to 8.1.8

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 10e89c6..c7963f9 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.1.7"
+#define LPFC_DRIVER_VERSION "8.1.8"
 
 #define LPFC_DRIVER_NAME "lpfc"
 
-- 
cgit v0.10.2


From b797b6de9d4d8d085169a507c54b18e74dbe4f21 Mon Sep 17 00:00:00 2001
From: Shyam Sundar <shyam.sundar@qlogic.com>
Date: Tue, 1 Aug 2006 13:48:13 -0700
Subject: [SCSI] qla2xxx: Correct endianess problem while issuing a Marker IOCB
 on ISP24xx.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 2b60a27..c5b3c61 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -471,6 +471,7 @@ __qla2x00_marker(scsi_qla_host_t *ha, uint16_t loop_id, uint16_t lun,
 			mrk24->nport_handle = cpu_to_le16(loop_id);
 			mrk24->lun[1] = LSB(lun);
 			mrk24->lun[2] = MSB(lun);
+			host_to_fcp_swap(mrk24->lun, sizeof(mrk24->lun));
 		} else {
 			SET_TARGET_ID(ha, mrk->target, loop_id);
 			mrk->lun = cpu_to_le16(lun);
-- 
cgit v0.10.2


From 45ebeb560570fd7ebae29a4105b81874063d5425 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Tue, 1 Aug 2006 13:48:14 -0700
Subject: [SCSI] qla2xxx: Log Trace/Diagonostic asynchronous events.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 139ea0e..0930260 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -487,6 +487,7 @@ typedef struct {
 #define MBA_IP_RCV_BUFFER_EMPTY 0x8026	/* IP receive buffer queue empty. */
 #define MBA_IP_HDR_DATA_SPLIT	0x8027	/* IP header/data splitting feature */
 					/* used. */
+#define MBA_TRACE_NOTIFICATION	0x8028	/* Trace/Diagnostic notification. */
 #define MBA_POINT_TO_POINT	0x8030	/* Point to point mode. */
 #define MBA_CMPLT_1_16BIT	0x8031	/* Completion 1 16bit IOSB. */
 #define MBA_CMPLT_2_16BIT	0x8032	/* Completion 2 16bit IOSB. */
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 795bf15..de06131 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -587,6 +587,11 @@ qla2x00_async_event(scsi_qla_host_t *ha, uint16_t *mb)
 		DEBUG2(printk("scsi(%ld): Discard RND Frame -- %04x %04x "
 		    "%04x.\n", ha->host_no, mb[1], mb[2], mb[3]));
 		break;
+
+	case MBA_TRACE_NOTIFICATION:
+		DEBUG2(printk("scsi(%ld): Trace Notification -- %04x %04x.\n",
+		ha->host_no, mb[1], mb[2]));
+		break;
 	}
 }
 
-- 
cgit v0.10.2


From b0328beed0786b07d0102ed59cbf0b54ad8f726b Mon Sep 17 00:00:00 2001
From: Vladislav Bolkhovitin <vst@vlnb.net>
Date: Tue, 1 Aug 2006 13:48:15 -0700
Subject: [SCSI] qla2xxx: Fix to allow to reset devices using sg interface
 (sg_reset).

Currently it is impossible to reset provided by Qlogic QLA2xxx driver
SCSI devices externally using corresponding sg devices, particularly via
sg_reset utility, because qla2xxx driver in qla2xxx_eh_device_reset()
function checks if the input scsi_cmnd has its private data (CMD_SP())
attached. Then the found pointer isn't used anywhere inside of
qla2xxx_eh_device_reset(). If the RESET request comes from sg device, it
doesn't have such private data.

The attached patch removes check for non-NULL CMD_SP() from
qla2xxx_eh_device_reset(), hence allows to reset QLA2xxx's devices using
corresponding sg devices.

AV: change applies to bus/host reset handlers as well.

Signed-off-by: Vladislav Bolkhovitin <vst@vlnb.net>
Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index ec7ebb6..1e2b95b 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -744,7 +744,6 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
 {
 	scsi_qla_host_t *ha = to_qla_host(cmd->device->host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
-	srb_t *sp;
 	int ret;
 	unsigned int id, lun;
 	unsigned long serial;
@@ -755,8 +754,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
 	lun = cmd->device->lun;
 	serial = cmd->serial_number;
 
-	sp = (srb_t *) CMD_SP(cmd);
-	if (!sp || !fcport)
+	if (!fcport)
 		return ret;
 
 	qla_printk(KERN_INFO, ha,
@@ -875,7 +873,6 @@ qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd)
 {
 	scsi_qla_host_t *ha = to_qla_host(cmd->device->host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
-	srb_t *sp;
 	int ret;
 	unsigned int id, lun;
 	unsigned long serial;
@@ -886,8 +883,7 @@ qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd)
 	lun = cmd->device->lun;
 	serial = cmd->serial_number;
 
-	sp = (srb_t *) CMD_SP(cmd);
-	if (!sp || !fcport)
+	if (!fcport)
 		return ret;
 
 	qla_printk(KERN_INFO, ha,
@@ -936,7 +932,6 @@ qla2xxx_eh_host_reset(struct scsi_cmnd *cmd)
 {
 	scsi_qla_host_t *ha = to_qla_host(cmd->device->host);
 	fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata;
-	srb_t *sp;
 	int ret;
 	unsigned int id, lun;
 	unsigned long serial;
@@ -947,8 +942,7 @@ qla2xxx_eh_host_reset(struct scsi_cmnd *cmd)
 	lun = cmd->device->lun;
 	serial = cmd->serial_number;
 
-	sp = (srb_t *) CMD_SP(cmd);
-	if (!sp || !fcport)
+	if (!fcport)
 		return ret;
 
 	qla_printk(KERN_INFO, ha,
-- 
cgit v0.10.2


From c2602c48b5ebde55b418ba252737bf60caa4bab0 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Tue, 1 Aug 2006 13:48:16 -0700
Subject: [SCSI] qla2xxx: Update version number to 8.01.05-k4.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index d2d6834..f5826bf 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.01.05-k3"
+#define QLA2XXX_VERSION      "8.01.05-k4"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	1
-- 
cgit v0.10.2


From fea9d6c7bcd8ff1d60ff74f27ba483b3820b18a3 Mon Sep 17 00:00:00 2001
From: Volker Sameske <sameske@de.ibm.com>
Date: Wed, 2 Aug 2006 11:05:16 +0200
Subject: [SCSI] zfcp: improve management of request IDs

Improve request handling. Use hash table to manage request IDs.

Signed-off-by: Volker Sameske <sameske@de.ibm.com>
Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 9cd789b..adc9d8f 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -112,6 +112,105 @@ _zfcp_hex_dump(char *addr, int count)
 		printk("\n");
 }
 
+
+/****************************************************************/
+/****** Functions to handle the request ID hash table    ********/
+/****************************************************************/
+
+#define ZFCP_LOG_AREA			ZFCP_LOG_AREA_FSF
+
+static int zfcp_reqlist_init(struct zfcp_adapter *adapter)
+{
+	int i;
+
+	adapter->req_list = kcalloc(REQUEST_LIST_SIZE, sizeof(struct list_head),
+				    GFP_KERNEL);
+
+	if (!adapter->req_list)
+		return -ENOMEM;
+
+	for (i=0; i<REQUEST_LIST_SIZE; i++)
+		INIT_LIST_HEAD(&adapter->req_list[i]);
+
+	return 0;
+}
+
+static void zfcp_reqlist_free(struct zfcp_adapter *adapter)
+{
+	struct zfcp_fsf_req *request, *tmp;
+	unsigned int i;
+
+	for (i=0; i<REQUEST_LIST_SIZE; i++) {
+		if (list_empty(&adapter->req_list[i]))
+			continue;
+
+		list_for_each_entry_safe(request, tmp,
+					 &adapter->req_list[i], list)
+			list_del(&request->list);
+	}
+
+	kfree(adapter->req_list);
+}
+
+void zfcp_reqlist_add(struct zfcp_adapter *adapter,
+		      struct zfcp_fsf_req *fsf_req)
+{
+	unsigned int i;
+
+	i = fsf_req->req_id % REQUEST_LIST_SIZE;
+	list_add_tail(&fsf_req->list, &adapter->req_list[i]);
+}
+
+void zfcp_reqlist_remove(struct zfcp_adapter *adapter, unsigned long req_id)
+{
+	struct zfcp_fsf_req *request, *tmp;
+	unsigned int i, counter;
+	u64 dbg_tmp[2];
+
+	i = req_id % REQUEST_LIST_SIZE;
+	BUG_ON(list_empty(&adapter->req_list[i]));
+
+	counter = 0;
+	list_for_each_entry_safe(request, tmp, &adapter->req_list[i], list) {
+		if (request->req_id == req_id) {
+			dbg_tmp[0] = (u64) atomic_read(&adapter->reqs_active);
+			dbg_tmp[1] = (u64) counter;
+			debug_event(adapter->erp_dbf, 4, (void *) dbg_tmp, 16);
+			list_del(&request->list);
+			break;
+		}
+		counter++;
+	}
+}
+
+struct zfcp_fsf_req *zfcp_reqlist_ismember(struct zfcp_adapter *adapter,
+					   unsigned long req_id)
+{
+	struct zfcp_fsf_req *request, *tmp;
+	unsigned int i;
+
+	i = req_id % REQUEST_LIST_SIZE;
+
+	list_for_each_entry_safe(request, tmp, &adapter->req_list[i], list)
+		if (request->req_id == req_id)
+			return request;
+
+	return NULL;
+}
+
+int zfcp_reqlist_isempty(struct zfcp_adapter *adapter)
+{
+	unsigned int i;
+
+	for (i=0; i<REQUEST_LIST_SIZE; i++)
+		if (!list_empty(&adapter->req_list[i]))
+			return 0;
+
+	return 1;
+}
+
+#undef ZFCP_LOG_AREA
+
 /****************************************************************/
 /************** Uncategorised Functions *************************/
 /****************************************************************/
@@ -961,8 +1060,12 @@ zfcp_adapter_enqueue(struct ccw_device *ccw_device)
 	INIT_LIST_HEAD(&adapter->port_remove_lh);
 
 	/* initialize list of fsf requests */
-	spin_lock_init(&adapter->fsf_req_list_lock);
-	INIT_LIST_HEAD(&adapter->fsf_req_list_head);
+	spin_lock_init(&adapter->req_list_lock);
+	retval = zfcp_reqlist_init(adapter);
+	if (retval) {
+		ZFCP_LOG_INFO("request list initialization failed\n");
+		goto failed_low_mem_buffers;
+	}
 
 	/* initialize debug locks */
 
@@ -1041,8 +1144,6 @@ zfcp_adapter_enqueue(struct ccw_device *ccw_device)
  *		!0 - struct zfcp_adapter  data structure could not be removed
  *			(e.g. still used)
  * locks:	adapter list write lock is assumed to be held by caller
- *              adapter->fsf_req_list_lock is taken and released within this 
- *              function and must not be held on entry
  */
 void
 zfcp_adapter_dequeue(struct zfcp_adapter *adapter)
@@ -1054,14 +1155,14 @@ zfcp_adapter_dequeue(struct zfcp_adapter *adapter)
 	zfcp_sysfs_adapter_remove_files(&adapter->ccw_device->dev);
 	dev_set_drvdata(&adapter->ccw_device->dev, NULL);
 	/* sanity check: no pending FSF requests */
-	spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
-	retval = !list_empty(&adapter->fsf_req_list_head);
-	spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
-	if (retval) {
+	spin_lock_irqsave(&adapter->req_list_lock, flags);
+	retval = zfcp_reqlist_isempty(adapter);
+	spin_unlock_irqrestore(&adapter->req_list_lock, flags);
+	if (!retval) {
 		ZFCP_LOG_NORMAL("bug: adapter %s (%p) still in use, "
 				"%i requests outstanding\n",
 				zfcp_get_busid_by_adapter(adapter), adapter,
-				atomic_read(&adapter->fsf_reqs_active));
+				atomic_read(&adapter->reqs_active));
 		retval = -EBUSY;
 		goto out;
 	}
@@ -1087,6 +1188,7 @@ zfcp_adapter_dequeue(struct zfcp_adapter *adapter)
 	zfcp_free_low_mem_buffers(adapter);
 	/* free memory of adapter data structure and queues */
 	zfcp_qdio_free_queues(adapter);
+	zfcp_reqlist_free(adapter);
 	kfree(adapter->fc_stats);
 	kfree(adapter->stats_reset_data);
 	ZFCP_LOG_TRACE("freeing adapter structure\n");
diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index 57d8e4b..fdabade 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -164,6 +164,11 @@ zfcp_ccw_set_online(struct ccw_device *ccw_device)
 	retval = zfcp_adapter_scsi_register(adapter);
 	if (retval)
 		goto out_scsi_register;
+
+	/* initialize request counter */
+	BUG_ON(!zfcp_reqlist_isempty(adapter));
+	adapter->req_no = 0;
+
 	zfcp_erp_modify_adapter_status(adapter, ZFCP_STATUS_COMMON_RUNNING,
 				       ZFCP_SET);
 	zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED);
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 2df512a..72293f3 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -886,11 +886,11 @@ struct zfcp_adapter {
 	struct list_head        port_remove_lh;    /* head of ports to be
 						      removed */
 	u32			ports;	           /* number of remote ports */
-        struct timer_list       scsi_er_timer;     /* SCSI err recovery watch */
-	struct list_head	fsf_req_list_head; /* head of FSF req list */
-	spinlock_t		fsf_req_list_lock; /* lock for ops on list of
-						      FSF requests */
-        atomic_t       		fsf_reqs_active;   /* # active FSF reqs */
+	struct timer_list	scsi_er_timer;     /* SCSI err recovery watch */
+	atomic_t		reqs_active;	   /* # active FSF reqs */
+	unsigned long		req_no;		   /* unique FSF req number */
+	struct list_head	*req_list;	   /* list of pending reqs */
+	spinlock_t		req_list_lock;	   /* request list lock */
 	struct zfcp_qdio_queue	request_queue;	   /* request queue */
 	u32			fsf_req_seq_no;	   /* FSF cmnd seq number */
 	wait_queue_head_t	request_wq;	   /* can be used to wait for
@@ -986,6 +986,7 @@ struct zfcp_unit {
 /* FSF request */
 struct zfcp_fsf_req {
 	struct list_head       list;	       /* list of FSF requests */
+	unsigned long	       req_id;	       /* unique request ID */
 	struct zfcp_adapter    *adapter;       /* adapter request belongs to */
 	u8		       sbal_number;    /* nr of SBALs free for use */
 	u8		       sbal_first;     /* first SBAL for this request */
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 8ec8da0..f74412b 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -848,18 +848,16 @@ zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action)
 	struct zfcp_adapter *adapter = erp_action->adapter;
 
 	if (erp_action->fsf_req) {
-		/* take lock to ensure that request is not being deleted meanwhile */
-		spin_lock(&adapter->fsf_req_list_lock);
-		/* check whether fsf req does still exist */
-		list_for_each_entry(fsf_req, &adapter->fsf_req_list_head, list)
-		    if (fsf_req == erp_action->fsf_req)
-			break;
-		if (fsf_req && (fsf_req->erp_action == erp_action)) {
+		/* take lock to ensure that request is not deleted meanwhile */
+		spin_lock(&adapter->req_list_lock);
+		if ((!zfcp_reqlist_ismember(adapter,
+					    erp_action->fsf_req->req_id)) &&
+		    (fsf_req->erp_action == erp_action)) {
 			/* fsf_req still exists */
 			debug_text_event(adapter->erp_dbf, 3, "a_ca_req");
 			debug_event(adapter->erp_dbf, 3, &fsf_req,
 				    sizeof (unsigned long));
-			/* dismiss fsf_req of timed out or dismissed erp_action */
+			/* dismiss fsf_req of timed out/dismissed erp_action */
 			if (erp_action->status & (ZFCP_STATUS_ERP_DISMISSED |
 						  ZFCP_STATUS_ERP_TIMEDOUT)) {
 				debug_text_event(adapter->erp_dbf, 3,
@@ -892,7 +890,7 @@ zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action)
 			 */
 			erp_action->fsf_req = NULL;
 		}
-		spin_unlock(&adapter->fsf_req_list_lock);
+		spin_unlock(&adapter->req_list_lock);
 	} else
 		debug_text_event(adapter->erp_dbf, 3, "a_ca_noreq");
 
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index d023660..04bb3a9 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -63,7 +63,6 @@ extern int  zfcp_qdio_allocate_queues(struct zfcp_adapter *);
 extern void zfcp_qdio_free_queues(struct zfcp_adapter *);
 extern int  zfcp_qdio_determine_pci(struct zfcp_qdio_queue *,
 				    struct zfcp_fsf_req *);
-extern int  zfcp_qdio_reqid_check(struct zfcp_adapter *, void *);
 
 extern volatile struct qdio_buffer_element *zfcp_qdio_sbale_req
 	(struct zfcp_fsf_req *, int, int);
@@ -190,5 +189,10 @@ extern void zfcp_scsi_dbf_event_abort(const char *, struct zfcp_adapter *,
 				      struct zfcp_fsf_req *);
 extern void zfcp_scsi_dbf_event_devreset(const char *, u8, struct zfcp_unit *,
 					 struct scsi_cmnd *);
+extern void zfcp_reqlist_add(struct zfcp_adapter *, struct zfcp_fsf_req *);
+extern void zfcp_reqlist_remove(struct zfcp_adapter *, unsigned long);
+extern struct zfcp_fsf_req *zfcp_reqlist_ismember(struct zfcp_adapter *,
+						  unsigned long);
+extern int zfcp_reqlist_isempty(struct zfcp_adapter *);
 
 #endif	/* ZFCP_EXT_H */
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 31db2b0..ff2eacf 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -49,7 +49,6 @@ static int zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *);
 static void zfcp_fsf_link_down_info_eval(struct zfcp_adapter *,
 	struct fsf_link_down_info *);
 static int zfcp_fsf_req_dispatch(struct zfcp_fsf_req *);
-static void zfcp_fsf_req_dismiss(struct zfcp_fsf_req *);
 
 /* association between FSF command and FSF QTCB type */
 static u32 fsf_qtcb_type[] = {
@@ -146,47 +145,48 @@ zfcp_fsf_req_free(struct zfcp_fsf_req *fsf_req)
 		kfree(fsf_req);
 }
 
-/*
- * function:	
- *
- * purpose:	
- *
- * returns:
- *
- * note: qdio queues shall be down (no ongoing inbound processing)
+/**
+ * zfcp_fsf_req_dismiss - dismiss a single fsf request
  */
-int
-zfcp_fsf_req_dismiss_all(struct zfcp_adapter *adapter)
+static void zfcp_fsf_req_dismiss(struct zfcp_adapter *adapter,
+				 struct zfcp_fsf_req *fsf_req,
+				 unsigned int counter)
 {
-	struct zfcp_fsf_req *fsf_req, *tmp;
-	unsigned long flags;
-	LIST_HEAD(remove_queue);
+	u64 dbg_tmp[2];
 
-	spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
-	list_splice_init(&adapter->fsf_req_list_head, &remove_queue);
-	atomic_set(&adapter->fsf_reqs_active, 0);
-	spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
-
-	list_for_each_entry_safe(fsf_req, tmp, &remove_queue, list) {
-		list_del(&fsf_req->list);
-		zfcp_fsf_req_dismiss(fsf_req);
-	}
-
-	return 0;
+	dbg_tmp[0] = (u64) atomic_read(&adapter->reqs_active);
+	dbg_tmp[1] = (u64) counter;
+	debug_event(adapter->erp_dbf, 4, (void *) dbg_tmp, 16);
+	list_del(&fsf_req->list);
+	fsf_req->status |= ZFCP_STATUS_FSFREQ_DISMISSED;
+	zfcp_fsf_req_complete(fsf_req);
 }
 
-/*
- * function:	
- *
- * purpose:	
- *
- * returns:
+/**
+ * zfcp_fsf_req_dismiss_all - dismiss all remaining fsf requests
  */
-static void
-zfcp_fsf_req_dismiss(struct zfcp_fsf_req *fsf_req)
+int zfcp_fsf_req_dismiss_all(struct zfcp_adapter *adapter)
 {
-	fsf_req->status |= ZFCP_STATUS_FSFREQ_DISMISSED;
-	zfcp_fsf_req_complete(fsf_req);
+	struct zfcp_fsf_req *request, *tmp;
+	unsigned long flags;
+	unsigned int i, counter;
+
+	spin_lock_irqsave(&adapter->req_list_lock, flags);
+	atomic_set(&adapter->reqs_active, 0);
+	for (i=0; i<REQUEST_LIST_SIZE; i++) {
+		if (list_empty(&adapter->req_list[i]))
+			continue;
+
+		counter = 0;
+		list_for_each_entry_safe(request, tmp,
+					 &adapter->req_list[i], list) {
+			zfcp_fsf_req_dismiss(adapter, request, counter);
+			counter++;
+		}
+	}
+	spin_unlock_irqrestore(&adapter->req_list_lock, flags);
+
+	return 0;
 }
 
 /*
@@ -4592,12 +4592,14 @@ static inline void
 zfcp_fsf_req_qtcb_init(struct zfcp_fsf_req *fsf_req)
 {
 	if (likely(fsf_req->qtcb != NULL)) {
-		fsf_req->qtcb->prefix.req_seq_no = fsf_req->adapter->fsf_req_seq_no;
-		fsf_req->qtcb->prefix.req_id = (unsigned long)fsf_req;
+		fsf_req->qtcb->prefix.req_seq_no =
+			fsf_req->adapter->fsf_req_seq_no;
+		fsf_req->qtcb->prefix.req_id = fsf_req->req_id;
 		fsf_req->qtcb->prefix.ulp_info = ZFCP_ULP_INFO_VERSION;
-		fsf_req->qtcb->prefix.qtcb_type = fsf_qtcb_type[fsf_req->fsf_command];
+		fsf_req->qtcb->prefix.qtcb_type =
+			fsf_qtcb_type[fsf_req->fsf_command];
 		fsf_req->qtcb->prefix.qtcb_version = ZFCP_QTCB_VERSION;
-		fsf_req->qtcb->header.req_handle = (unsigned long)fsf_req;
+		fsf_req->qtcb->header.req_handle = fsf_req->req_id;
 		fsf_req->qtcb->header.fsf_command = fsf_req->fsf_command;
 	}
 }
@@ -4654,6 +4656,7 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 {
 	volatile struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *fsf_req = NULL;
+	unsigned long flags;
 	int ret = 0;
 	struct zfcp_qdio_queue *req_queue = &adapter->request_queue;
 
@@ -4668,6 +4671,12 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 
 	fsf_req->adapter = adapter;
 	fsf_req->fsf_command = fsf_cmd;
+	INIT_LIST_HEAD(&fsf_req->list);
+	
+	/* unique request id */
+	spin_lock_irqsave(&adapter->req_list_lock, flags);
+	fsf_req->req_id = adapter->req_no++;
+	spin_unlock_irqrestore(&adapter->req_list_lock, flags);
 
         zfcp_fsf_req_qtcb_init(fsf_req);
 
@@ -4707,7 +4716,7 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
 
 	/* setup common SBALE fields */
-	sbale[0].addr = fsf_req;
+	sbale[0].addr = (void *) fsf_req->req_id;
 	sbale[0].flags |= SBAL_FLAGS0_COMMAND;
 	if (likely(fsf_req->qtcb != NULL)) {
 		sbale[1].addr = (void *) fsf_req->qtcb;
@@ -4747,7 +4756,7 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 	volatile struct qdio_buffer_element *sbale;
 	int inc_seq_no;
 	int new_distance_from_int;
-	unsigned long flags;
+	u64 dbg_tmp[2];
 	int retval = 0;
 
 	adapter = fsf_req->adapter;
@@ -4761,10 +4770,10 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 	ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_TRACE, (char *) sbale[1].addr,
 		      sbale[1].length);
 
-	/* put allocated FSF request at list tail */
-	spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
-	list_add_tail(&fsf_req->list, &adapter->fsf_req_list_head);
-	spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
+	/* put allocated FSF request into hash table */
+	spin_lock(&adapter->req_list_lock);
+	zfcp_reqlist_add(adapter, fsf_req);
+	spin_unlock(&adapter->req_list_lock);
 
 	inc_seq_no = (fsf_req->qtcb != NULL);
 
@@ -4803,6 +4812,10 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 			 QDIO_FLAG_SYNC_OUTPUT,
 			 0, fsf_req->sbal_first, fsf_req->sbal_number, NULL);
 
+	dbg_tmp[0] = (unsigned long) sbale[0].addr;
+	dbg_tmp[1] = (u64) retval;
+	debug_event(adapter->erp_dbf, 4, (void *) dbg_tmp, 16);
+
 	if (unlikely(retval)) {
 		/* Queues are down..... */
 		retval = -EIO;
@@ -4812,22 +4825,17 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 		 */
 		if (timer)
 			del_timer(timer);
-		spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
-		list_del(&fsf_req->list);
-		spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
-		/*
-		 * adjust the number of free SBALs in request queue as well as
-		 * position of first one
-		 */
+		spin_lock(&adapter->req_list_lock);
+		zfcp_reqlist_remove(adapter, fsf_req->req_id);
+		spin_unlock(&adapter->req_list_lock);
+		/* undo changes in request queue made for this request */
 		zfcp_qdio_zero_sbals(req_queue->buffer,
 				     fsf_req->sbal_first, fsf_req->sbal_number);
 		atomic_add(fsf_req->sbal_number, &req_queue->free_count);
-		req_queue->free_index -= fsf_req->sbal_number;	 /* increase */
+		req_queue->free_index -= fsf_req->sbal_number;
 		req_queue->free_index += QDIO_MAX_BUFFERS_PER_Q;
 		req_queue->free_index %= QDIO_MAX_BUFFERS_PER_Q; /* wrap */
-		ZFCP_LOG_DEBUG
-			("error: do_QDIO failed. Buffers could not be enqueued "
-			 "to request queue.\n");
+		zfcp_erp_adapter_reopen(adapter, 0);
 	} else {
 		req_queue->distance_from_int = new_distance_from_int;
 		/*
@@ -4843,7 +4851,7 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 			adapter->fsf_req_seq_no++;
 
 		/* count FSF requests pending */
-		atomic_inc(&adapter->fsf_reqs_active);
+		atomic_inc(&adapter->reqs_active);
 	}
 	return retval;
 }
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 49ea5ad..dbd9f48 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -282,6 +282,37 @@ zfcp_qdio_request_handler(struct ccw_device *ccw_device,
 	return;
 }
 
+/**
+ * zfcp_qdio_reqid_check - checks for valid reqids or unsolicited status
+ */
+static int zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, 
+				 unsigned long req_id)
+{
+	struct zfcp_fsf_req *fsf_req;
+	unsigned long flags;
+
+	debug_long_event(adapter->erp_dbf, 4, req_id);
+
+	spin_lock_irqsave(&adapter->req_list_lock, flags);
+	fsf_req = zfcp_reqlist_ismember(adapter, req_id);
+
+	if (!fsf_req) {
+		spin_unlock_irqrestore(&adapter->req_list_lock, flags);
+		ZFCP_LOG_NORMAL("error: unknown request id (%ld).\n", req_id);
+		zfcp_erp_adapter_reopen(adapter, 0);
+		return -EINVAL;
+	}
+
+	zfcp_reqlist_remove(adapter, req_id);
+	atomic_dec(&adapter->reqs_active);
+	spin_unlock_irqrestore(&adapter->req_list_lock, flags);
+
+	/* finish the FSF request */
+	zfcp_fsf_req_complete(fsf_req);
+
+	return 0;
+}
+
 /*
  * function:   	zfcp_qdio_response_handler
  *
@@ -344,7 +375,7 @@ zfcp_qdio_response_handler(struct ccw_device *ccw_device,
 			/* look for QDIO request identifiers in SB */
 			buffere = &buffer->element[buffere_index];
 			retval = zfcp_qdio_reqid_check(adapter,
-						       (void *) buffere->addr);
+					(unsigned long) buffere->addr);
 
 			if (retval) {
 				ZFCP_LOG_NORMAL("bug: unexpected inbound "
@@ -415,52 +446,6 @@ zfcp_qdio_response_handler(struct ccw_device *ccw_device,
 	return;
 }
 
-/*
- * function:	zfcp_qdio_reqid_check
- *
- * purpose:	checks for valid reqids or unsolicited status
- *
- * returns:	0 - valid request id or unsolicited status
- *		!0 - otherwise
- */
-int
-zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr)
-{
-	struct zfcp_fsf_req *fsf_req;
-	unsigned long flags;
-
-	/* invalid (per convention used in this driver) */
-	if (unlikely(!sbale_addr)) {
-		ZFCP_LOG_NORMAL("bug: invalid reqid\n");
-		return -EINVAL;
-	}
-
-	/* valid request id and thus (hopefully :) valid fsf_req address */
-	fsf_req = (struct zfcp_fsf_req *) sbale_addr;
-
-	/* serialize with zfcp_fsf_req_dismiss_all */
-	spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
-	if (list_empty(&adapter->fsf_req_list_head)) {
-		spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
-		return 0;
-	}
-	list_del(&fsf_req->list);
-	atomic_dec(&adapter->fsf_reqs_active);
-	spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
-
-	if (unlikely(adapter != fsf_req->adapter)) {
-		ZFCP_LOG_NORMAL("bug: invalid reqid (fsf_req=%p, "
-				"fsf_req->adapter=%p, adapter=%p)\n",
-				fsf_req, fsf_req->adapter, adapter);
-		return -EINVAL;
-	}
-
-	/* finish the FSF request */
-	zfcp_fsf_req_complete(fsf_req);
-
-	return 0;
-}
-
 /**
  * zfcp_qdio_sbale_get - return pointer to SBALE of qdio_queue
  * @queue: queue from which SBALE should be returned
-- 
cgit v0.10.2


From f6c0e7a7b3b6db15146877c0cef43b413af5b76e Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Wed, 2 Aug 2006 11:05:52 +0200
Subject: [SCSI] zfcp: minor erp bug fixes

Bug fixes for zfcp's erp:
- trigger adapter reopen if do_QDIO fails
- avoid erp deadlock if registration of scsi target or remote port hang
- do not treat as error if exchange port data fails
- decrease timeout for target reset and aborts
- mark unit failed if slave_destroy is called

Additionally some code cleanup was done:
- made some functions void when retval is not of interest
- shortened initialization of zfcp's host_template
- corrected some comments

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 72293f3..904d6a7 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -80,7 +80,7 @@ zfcp_address_to_sg(void *address, struct scatterlist *list)
 #define REQUEST_LIST_SIZE 128
 
 /********************* SCSI SPECIFIC DEFINES *********************************/
-#define ZFCP_SCSI_ER_TIMEOUT                    (100*HZ)
+#define ZFCP_SCSI_ER_TIMEOUT                    (10*HZ)
 
 /********************* CIO/QDIO SPECIFIC DEFINES *****************************/
 
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index f74412b..7f60b6f 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -64,8 +64,8 @@ static int zfcp_erp_strategy_check_action(struct zfcp_erp_action *, int);
 static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *, int);
 static int zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *);
-static int zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *);
-static int zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *);
+static void zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *);
+static void zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *);
@@ -93,10 +93,9 @@ static int zfcp_erp_unit_strategy_clearstati(struct zfcp_unit *);
 static int zfcp_erp_unit_strategy_close(struct zfcp_erp_action *);
 static int zfcp_erp_unit_strategy_open(struct zfcp_erp_action *);
 
-static int zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
-static int zfcp_erp_action_dismiss_port(struct zfcp_port *);
-static int zfcp_erp_action_dismiss_unit(struct zfcp_unit *);
-static int zfcp_erp_action_dismiss(struct zfcp_erp_action *);
+static void zfcp_erp_action_dismiss_port(struct zfcp_port *);
+static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *);
+static void zfcp_erp_action_dismiss(struct zfcp_erp_action *);
 
 static int zfcp_erp_action_enqueue(int, struct zfcp_adapter *,
 				   struct zfcp_port *, struct zfcp_unit *);
@@ -135,29 +134,39 @@ zfcp_fsf_request_timeout_handler(unsigned long data)
 	zfcp_erp_adapter_reopen(adapter, 0);
 }
 
-/*
- * function:	zfcp_fsf_scsi_er_timeout_handler
- *
- * purpose:     This function needs to be called whenever a SCSI error recovery
- *              action (abort/reset) does not return.
- *              Re-opening the adapter means that the command can be returned
- *              by zfcp (it is guarranteed that it does not return via the
- *              adapter anymore). The buffer can then be used again.
- *    
- * returns:     sod all
+/**
+ * zfcp_fsf_scsi_er_timeout_handler - timeout handler for scsi eh tasks
+ *
+ * This function needs to be called whenever a SCSI error recovery
+ * action (abort/reset) does not return.  Re-opening the adapter means
+ * that the abort/reset command can be returned by zfcp. It won't complete
+ * via the adapter anymore (because qdio queues are closed). If ERP is
+ * already running on this adapter it will be stopped.
  */
-void
-zfcp_fsf_scsi_er_timeout_handler(unsigned long data)
+void zfcp_fsf_scsi_er_timeout_handler(unsigned long data)
 {
 	struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
+	unsigned long flags;
 
 	ZFCP_LOG_NORMAL("warning: SCSI error recovery timed out. "
 			"Restarting all operations on the adapter %s\n",
 			zfcp_get_busid_by_adapter(adapter));
 	debug_text_event(adapter->erp_dbf, 1, "eh_lmem_tout");
-	zfcp_erp_adapter_reopen(adapter, 0);
 
-	return;
+	write_lock_irqsave(&adapter->erp_lock, flags);
+	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
+			     &adapter->status)) {
+		zfcp_erp_modify_adapter_status(adapter,
+		       ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
+		       ZFCP_CLEAR);
+		zfcp_erp_action_dismiss_adapter(adapter);
+		write_unlock_irqrestore(&adapter->erp_lock, flags);
+		/* dismiss all pending requests including requests for ERP */
+		zfcp_fsf_req_dismiss_all(adapter);
+		adapter->fsf_req_seq_no = 0;
+	} else
+		write_unlock_irqrestore(&adapter->erp_lock, flags);
+	zfcp_erp_adapter_reopen(adapter, 0);
 }
 
 /*
@@ -670,17 +679,10 @@ zfcp_erp_unit_reopen(struct zfcp_unit *unit, int clear_mask)
 	return retval;
 }
 
-/*
- * function:	
- *
- * purpose:	disable I/O,
- *		return any open requests and clean them up,
- *		aim: no pending and incoming I/O
- *
- * returns:
+/**
+ * zfcp_erp_adapter_block - mark adapter as blocked, block scsi requests
  */
-static void
-zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int clear_mask)
+static void zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int clear_mask)
 {
 	debug_text_event(adapter->erp_dbf, 6, "a_bl");
 	zfcp_erp_modify_adapter_status(adapter,
@@ -688,15 +690,10 @@ zfcp_erp_adapter_block(struct zfcp_adapter *adapter, int clear_mask)
 				       clear_mask, ZFCP_CLEAR);
 }
 
-/*
- * function:	
- *
- * purpose:	enable I/O
- *
- * returns:
+/**
+ * zfcp_erp_adapter_unblock - mark adapter as unblocked, allow scsi requests
  */
-static void
-zfcp_erp_adapter_unblock(struct zfcp_adapter *adapter)
+static void zfcp_erp_adapter_unblock(struct zfcp_adapter *adapter)
 {
 	debug_text_event(adapter->erp_dbf, 6, "a_ubl");
 	atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status);
@@ -897,23 +894,15 @@ zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action)
 	return retval;
 }
 
-/*
- * purpose:	generic handler for asynchronous events related to erp_action events
- *		(normal completion, time-out, dismissing, retry after
- *		low memory condition)
- *
- * note:	deletion of timer is not required (e.g. in case of a time-out),
- *		but a second try does no harm,
- *		we leave it in here to allow for greater simplification
+/**
+ * zfcp_erp_async_handler_nolock - complete erp_action
  *
- * returns:	0 - there was an action to handle
- *		!0 - otherwise
+ * Used for normal completion, time-out, dismissal and failure after
+ * low memory condition.
  */
-static int
-zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action,
-			      unsigned long set_mask)
+static void zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action,
+					  unsigned long set_mask)
 {
-	int retval;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 
 	if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) {
@@ -924,43 +913,26 @@ zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action,
 			del_timer(&erp_action->timer);
 		erp_action->status |= set_mask;
 		zfcp_erp_action_ready(erp_action);
-		retval = 0;
 	} else {
 		/* action is ready or gone - nothing to do */
 		debug_text_event(adapter->erp_dbf, 3, "a_asyh_gone");
 		debug_event(adapter->erp_dbf, 3, &erp_action->action,
 			    sizeof (int));
-		retval = 1;
 	}
-
-	return retval;
 }
 
-/*
- * purpose:	generic handler for asynchronous events related to erp_action
- *               events	(normal completion, time-out, dismissing, retry after
- *		low memory condition)
- *
- * note:	deletion of timer is not required (e.g. in case of a time-out),
- *		but a second try does no harm,
- *		we leave it in here to allow for greater simplification
- *
- * returns:	0 - there was an action to handle
- *		!0 - otherwise
+/**
+ * zfcp_erp_async_handler - wrapper for erp_async_handler_nolock w/ locking
  */
-int
-zfcp_erp_async_handler(struct zfcp_erp_action *erp_action,
-		       unsigned long set_mask)
+void zfcp_erp_async_handler(struct zfcp_erp_action *erp_action,
+			    unsigned long set_mask)
 {
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	unsigned long flags;
-	int retval;
 
 	write_lock_irqsave(&adapter->erp_lock, flags);
-	retval = zfcp_erp_async_handler_nolock(erp_action, set_mask);
+	zfcp_erp_async_handler_nolock(erp_action, set_mask);
 	write_unlock_irqrestore(&adapter->erp_lock, flags);
-
-	return retval;
 }
 
 /*
@@ -997,17 +969,15 @@ zfcp_erp_timeout_handler(unsigned long data)
 	zfcp_erp_async_handler(erp_action, ZFCP_STATUS_ERP_TIMEDOUT);
 }
 
-/*
- * purpose:	is called for an erp_action which needs to be ended
- *		though not being done,
- *		this is usually required if an higher is generated,
- *		action gets an appropriate flag and will be processed
- *		accordingly
+/**
+ * zfcp_erp_action_dismiss - dismiss an erp_action
  *
- * locks:	erp_lock held (thus we need to call another handler variant)
+ * adapter->erp_lock must be held
+ * 
+ * Dismissal of an erp_action is usually required if an erp_action of
+ * higher priority is generated.
  */
-static int
-zfcp_erp_action_dismiss(struct zfcp_erp_action *erp_action)
+static void zfcp_erp_action_dismiss(struct zfcp_erp_action *erp_action)
 {
 	struct zfcp_adapter *adapter = erp_action->adapter;
 
@@ -1015,8 +985,6 @@ zfcp_erp_action_dismiss(struct zfcp_erp_action *erp_action)
 	debug_event(adapter->erp_dbf, 2, &erp_action->action, sizeof (int));
 
 	zfcp_erp_async_handler_nolock(erp_action, ZFCP_STATUS_ERP_DISMISSED);
-
-	return 0;
 }
 
 int
@@ -2072,18 +2040,12 @@ zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *erp_action)
 	return retval;
 }
 
-/*
- * function:    zfcp_qdio_cleanup
- *
- * purpose:	cleans up QDIO operation for the specified adapter
- *
- * returns:	0 - successful cleanup
- *		!0 - failed cleanup
+/**
+ * zfcp_erp_adapter_strategy_close_qdio - close qdio queues for an adapter
  */
-int
+static void
 zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action)
 {
-	int retval = ZFCP_ERP_SUCCEEDED;
 	int first_used;
 	int used_count;
 	struct zfcp_adapter *adapter = erp_action->adapter;
@@ -2092,15 +2054,13 @@ zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action)
 		ZFCP_LOG_DEBUG("error: attempt to shut down inactive QDIO "
 			       "queues on adapter %s\n",
 			       zfcp_get_busid_by_adapter(adapter));
-		retval = ZFCP_ERP_FAILED;
-		goto out;
+		return;
 	}
 
 	/*
 	 * Get queue_lock and clear QDIOUP flag. Thus it's guaranteed that
 	 * do_QDIO won't be called while qdio_shutdown is in progress.
 	 */
-
 	write_lock_irq(&adapter->request_queue.queue_lock);
 	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
 	write_unlock_irq(&adapter->request_queue.queue_lock);
@@ -2132,8 +2092,6 @@ zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action)
 	adapter->request_queue.free_index = 0;
 	atomic_set(&adapter->request_queue.free_count, 0);
 	adapter->request_queue.distance_from_int = 0;
- out:
-	return retval;
 }
 
 static int
@@ -2256,11 +2214,11 @@ zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action)
 			      "%s)\n", zfcp_get_busid_by_adapter(adapter));
 		ret = ZFCP_ERP_FAILED;
 	}
-	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status)) {
-		ZFCP_LOG_INFO("error: exchange port data failed (adapter "
+
+	/* don't treat as error for the sake of compatibility */
+	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status))
+		ZFCP_LOG_INFO("warning: exchange port data failed (adapter "
 			      "%s\n", zfcp_get_busid_by_adapter(adapter));
-		ret = ZFCP_ERP_FAILED;
-	}
 
 	return ret;
 }
@@ -2290,18 +2248,12 @@ zfcp_erp_adapter_strategy_open_fsf_statusread(struct zfcp_erp_action
 	return retval;
 }
 
-/*
- * function:    zfcp_fsf_cleanup
- *
- * purpose:	cleanup FSF operation for specified adapter
- *
- * returns:	0 - FSF operation successfully cleaned up
- *		!0 - failed to cleanup FSF operation for this adapter
+/**
+ * zfcp_erp_adapter_strategy_close_fsf - stop FSF operations for an adapter
  */
-static int
+static void
 zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *erp_action)
 {
-	int retval = ZFCP_ERP_SUCCEEDED;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 
 	/*
@@ -2315,8 +2267,6 @@ zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *erp_action)
 	/* all ports and units are closed */
 	zfcp_erp_modify_adapter_status(adapter,
 				       ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
-
-	return retval;
 }
 
 /*
@@ -3291,10 +3241,8 @@ zfcp_erp_action_cleanup(int action, struct zfcp_adapter *adapter,
 }
 
 
-static int
-zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
+void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
 {
-	int retval = 0;
 	struct zfcp_port *port;
 
 	debug_text_event(adapter->erp_dbf, 5, "a_actab");
@@ -3303,14 +3251,10 @@ zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
 	else
 		list_for_each_entry(port, &adapter->port_list_head, list)
 		    zfcp_erp_action_dismiss_port(port);
-
-	return retval;
 }
 
-static int
-zfcp_erp_action_dismiss_port(struct zfcp_port *port)
+static void zfcp_erp_action_dismiss_port(struct zfcp_port *port)
 {
-	int retval = 0;
 	struct zfcp_unit *unit;
 	struct zfcp_adapter *adapter = port->adapter;
 
@@ -3321,22 +3265,16 @@ zfcp_erp_action_dismiss_port(struct zfcp_port *port)
 	else
 		list_for_each_entry(unit, &port->unit_list_head, list)
 		    zfcp_erp_action_dismiss_unit(unit);
-
-	return retval;
 }
 
-static int
-zfcp_erp_action_dismiss_unit(struct zfcp_unit *unit)
+static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *unit)
 {
-	int retval = 0;
 	struct zfcp_adapter *adapter = unit->port->adapter;
 
 	debug_text_event(adapter->erp_dbf, 5, "u_actab");
 	debug_event(adapter->erp_dbf, 5, &unit->fcp_lun, sizeof (fcp_lun_t));
 	if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &unit->status))
 		zfcp_erp_action_dismiss(&unit->erp_action);
-
-	return retval;
 }
 
 static inline void
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 04bb3a9..146d7a2 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -139,6 +139,7 @@ extern void zfcp_erp_modify_adapter_status(struct zfcp_adapter *, u32, int);
 extern int  zfcp_erp_adapter_reopen(struct zfcp_adapter *, int);
 extern int  zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int);
 extern void zfcp_erp_adapter_failed(struct zfcp_adapter *);
+extern void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
 
 extern void zfcp_erp_modify_port_status(struct zfcp_port *, u32, int);
 extern int  zfcp_erp_port_reopen(struct zfcp_port *, int);
@@ -155,7 +156,7 @@ extern void zfcp_erp_unit_failed(struct zfcp_unit *);
 extern int  zfcp_erp_thread_setup(struct zfcp_adapter *);
 extern int  zfcp_erp_thread_kill(struct zfcp_adapter *);
 extern int  zfcp_erp_wait(struct zfcp_adapter *);
-extern int  zfcp_erp_async_handler(struct zfcp_erp_action *, unsigned long);
+extern void zfcp_erp_async_handler(struct zfcp_erp_action *, unsigned long);
 
 extern int  zfcp_test_link(struct zfcp_port *);
 
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 671f4a6..1bb5508 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -30,7 +30,6 @@ static int zfcp_scsi_queuecommand(struct scsi_cmnd *,
 				  void (*done) (struct scsi_cmnd *));
 static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *);
 static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *);
-static int zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *);
 static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *);
 static int zfcp_task_management_function(struct zfcp_unit *, u8,
 					 struct scsi_cmnd *);
@@ -46,30 +45,22 @@ struct zfcp_data zfcp_data = {
 	.scsi_host_template = {
 		.name			= ZFCP_NAME,
 		.proc_name		= "zfcp",
-		.proc_info		= NULL,
-		.detect			= NULL,
 		.slave_alloc		= zfcp_scsi_slave_alloc,
 		.slave_configure	= zfcp_scsi_slave_configure,
 		.slave_destroy		= zfcp_scsi_slave_destroy,
 		.queuecommand		= zfcp_scsi_queuecommand,
 		.eh_abort_handler	= zfcp_scsi_eh_abort_handler,
 		.eh_device_reset_handler = zfcp_scsi_eh_device_reset_handler,
-		.eh_bus_reset_handler	= zfcp_scsi_eh_bus_reset_handler,
+		.eh_bus_reset_handler	= zfcp_scsi_eh_host_reset_handler,
 		.eh_host_reset_handler	= zfcp_scsi_eh_host_reset_handler,
 		.can_queue		= 4096,
 		.this_id		= -1,
-		/*
-		 * FIXME:
-		 * one less? can zfcp_create_sbale cope with it?
-		 */
 		.sg_tablesize		= ZFCP_MAX_SBALES_PER_REQ,
 		.cmd_per_lun		= 1,
-		.unchecked_isa_dma	= 0,
 		.use_clustering		= 1,
 		.sdev_attrs		= zfcp_sysfs_sdev_attrs,
 	},
 	.driver_version = ZFCP_VERSION,
-	/* rest initialised with zeros */
 };
 
 /* Find start of Response Information in FCP response unit*/
@@ -176,8 +167,14 @@ zfcp_scsi_slave_alloc(struct scsi_device *sdp)
 	return retval;
 }
 
-static void
-zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
+/**
+ * zfcp_scsi_slave_destroy - called when scsi device is removed
+ *
+ * Remove reference to associated scsi device for an zfcp_unit.
+ * Mark zfcp_unit as failed. The scsi device might be deleted via sysfs
+ * or a scan for this device might have failed.
+ */
+static void zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
 {
 	struct zfcp_unit *unit = (struct zfcp_unit *) sdpnt->hostdata;
 
@@ -185,6 +182,7 @@ zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
 		atomic_clear_mask(ZFCP_STATUS_UNIT_REGISTERED, &unit->status);
 		sdpnt->hostdata = NULL;
 		unit->device = NULL;
+		zfcp_erp_unit_failed(unit);
 		zfcp_unit_put(unit);
 	} else {
 		ZFCP_LOG_NORMAL("bug: no unit associated with SCSI device at "
@@ -549,35 +547,38 @@ zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags,
 }
 
 /**
- * zfcp_scsi_eh_bus_reset_handler - reset bus (reopen adapter)
+ * zfcp_scsi_eh_host_reset_handler - handler for host and bus reset
+ *
+ * If ERP is already running it will be stopped.
  */
-int
-zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *scpnt)
+int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
 {
-	struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata;
-	struct zfcp_adapter *adapter = unit->port->adapter;
-
-	ZFCP_LOG_NORMAL("bus reset because of problems with "
-			"unit 0x%016Lx\n", unit->fcp_lun);
-	zfcp_erp_adapter_reopen(adapter, 0);
-	zfcp_erp_wait(adapter);
-
-	return SUCCESS;
-}
+	struct zfcp_unit *unit;
+	struct zfcp_adapter *adapter;
+	unsigned long flags;
 
-/**
- * zfcp_scsi_eh_host_reset_handler - reset host (reopen adapter)
- */
-int
-zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
-{
-	struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata;
-	struct zfcp_adapter *adapter = unit->port->adapter;
+	unit = (struct zfcp_unit*) scpnt->device->hostdata;
+	adapter = unit->port->adapter;
 
-	ZFCP_LOG_NORMAL("host reset because of problems with "
+	ZFCP_LOG_NORMAL("host/bus reset because of problems with "
 			"unit 0x%016Lx\n", unit->fcp_lun);
-	zfcp_erp_adapter_reopen(adapter, 0);
-	zfcp_erp_wait(adapter);
+
+	write_lock_irqsave(&adapter->erp_lock, flags);
+	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
+			     &adapter->status)) {
+		zfcp_erp_modify_adapter_status(adapter,
+		       ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
+		       ZFCP_CLEAR);
+		zfcp_erp_action_dismiss_adapter(adapter);
+		write_unlock_irqrestore(&adapter->erp_lock, flags);
+		zfcp_fsf_req_dismiss_all(adapter);
+		adapter->fsf_req_seq_no = 0;
+		zfcp_erp_adapter_reopen(adapter, 0);
+	} else {
+		write_unlock_irqrestore(&adapter->erp_lock, flags);
+		zfcp_erp_adapter_reopen(adapter, 0);
+		zfcp_erp_wait(adapter);
+	}
 
 	return SUCCESS;
 }
-- 
cgit v0.10.2


From 58b3ac07fed31ffc1349380b78305af6522fe1f4 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <aherrman@de.ibm.com>
Date: Wed, 2 Aug 2006 11:06:21 +0200
Subject: [SCSI] zfcp: bump version number

New version number fo zfcp driver.

Signed-off-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 904d6a7..94d1b74d 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -52,7 +52,7 @@
 /********************* GENERAL DEFINES *********************************/
 
 /* zfcp version number, it consists of major, minor, and patch-level number */
-#define ZFCP_VERSION		"4.7.0"
+#define ZFCP_VERSION		"4.8.0"
 
 /**
  * zfcp_sg_to_address - determine kernel address from struct scatterlist
-- 
cgit v0.10.2


From 3e74051bc7b780c5ba28939f9d5c4cd3280a5ff7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 30 Jul 2006 19:13:36 +0200
Subject: [SCSI] hptiop: backout ioctl mess

The hptiop just got merged with a horrible amount of really bad ioctl
code that is against the standards for new scsi drivers.  This patch
backs it out (and fixes a small bug where scsi_add_host is called to
early).  We can re-add proper APIs once we agree on them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index 74d4d22..bcb3444 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -45,10 +45,6 @@ static char driver_name[] = "hptiop";
 static const char driver_name_long[] = "RocketRAID 3xxx SATA Controller driver";
 static const char driver_ver[] = "v1.0 (060426)";
 
-static DEFINE_SPINLOCK(hptiop_hba_list_lock);
-static LIST_HEAD(hptiop_hba_list);
-static int hptiop_cdev_major = -1;
-
 static void hptiop_host_request_callback(struct hptiop_hba *hba, u32 tag);
 static void hptiop_iop_request_callback(struct hptiop_hba *hba, u32 tag);
 static void hptiop_message_callback(struct hptiop_hba *hba, u32 msg);
@@ -620,532 +616,11 @@ static int hptiop_adjust_disk_queue_depth(struct scsi_device *sdev,
 	return queue_depth;
 }
 
-struct hptiop_getinfo {
-	char __user *buffer;
-	loff_t buflength;
-	loff_t bufoffset;
-	loff_t buffillen;
-	loff_t filpos;
-};
-
-static void hptiop_copy_mem_info(struct hptiop_getinfo *pinfo,
-					char *data, int datalen)
-{
-	if (pinfo->filpos < pinfo->bufoffset) {
-		if (pinfo->filpos + datalen <= pinfo->bufoffset) {
-			pinfo->filpos += datalen;
-			return;
-		} else {
-			data += (pinfo->bufoffset - pinfo->filpos);
-			datalen  -= (pinfo->bufoffset - pinfo->filpos);
-			pinfo->filpos = pinfo->bufoffset;
-		}
-	}
-
-	pinfo->filpos += datalen;
-	if (pinfo->buffillen == pinfo->buflength)
-		return;
-
-	if (pinfo->buflength - pinfo->buffillen < datalen)
-		datalen = pinfo->buflength - pinfo->buffillen;
-
-	if (copy_to_user(pinfo->buffer + pinfo->buffillen, data, datalen))
-		return;
-
-	pinfo->buffillen += datalen;
-}
-
-static int hptiop_copy_info(struct hptiop_getinfo *pinfo, char *fmt, ...)
-{
-	va_list args;
-	char buf[128];
-	int len;
-
-	va_start(args, fmt);
-	len = vsnprintf(buf, sizeof(buf), fmt, args);
-	va_end(args);
-	hptiop_copy_mem_info(pinfo, buf, len);
-	return len;
-}
-
-static void hptiop_ioctl_done(struct hpt_ioctl_k *arg)
-{
-	arg->done = NULL;
-	wake_up(&arg->hba->ioctl_wq);
-}
-
-static void hptiop_do_ioctl(struct hpt_ioctl_k *arg)
-{
-	struct hptiop_hba *hba = arg->hba;
-	u32 val;
-	struct hpt_iop_request_ioctl_command __iomem *req;
-	int ioctl_retry = 0;
-
-	dprintk("scsi%d: hptiop_do_ioctl\n", hba->host->host_no);
-
-	/*
-	 * check (in + out) buff size from application.
-	 * outbuf must be dword aligned.
-	 */
-	if (((arg->inbuf_size + 3) & ~3) + arg->outbuf_size >
-			hba->max_request_size
-				- sizeof(struct hpt_iop_request_header)
-				- 4 * sizeof(u32)) {
-		dprintk("scsi%d: ioctl buf size (%d/%d) is too large\n",
-				hba->host->host_no,
-				arg->inbuf_size, arg->outbuf_size);
-		arg->result = HPT_IOCTL_RESULT_FAILED;
-		return;
-	}
-
-retry:
-	spin_lock_irq(hba->host->host_lock);
-
-	val = readl(&hba->iop->inbound_queue);
-	if (val == IOPMU_QUEUE_EMPTY) {
-		spin_unlock_irq(hba->host->host_lock);
-		dprintk("scsi%d: no free req for ioctl\n", hba->host->host_no);
-		arg->result = -1;
-		return;
-	}
-
-	req = (struct hpt_iop_request_ioctl_command __iomem *)
-			((unsigned long)hba->iop + val);
-
-	writel(HPT_CTL_CODE_LINUX_TO_IOP(arg->ioctl_code),
-			&req->ioctl_code);
-	writel(arg->inbuf_size, &req->inbuf_size);
-	writel(arg->outbuf_size, &req->outbuf_size);
-
-	/*
-	 * use the buffer on the IOP local memory first, then copy it
-	 * back to host.
-	 * the caller's request buffer shoudl be little-endian.
-	 */
-	if (arg->inbuf_size)
-		memcpy_toio(req->buf, arg->inbuf, arg->inbuf_size);
-
-	/* correct the controller ID for IOP */
-	if ((arg->ioctl_code == HPT_IOCTL_GET_CHANNEL_INFO ||
-		arg->ioctl_code == HPT_IOCTL_GET_CONTROLLER_INFO_V2 ||
-		arg->ioctl_code == HPT_IOCTL_GET_CONTROLLER_INFO)
-		&& arg->inbuf_size >= sizeof(u32))
-		writel(0, req->buf);
-
-	writel(IOP_REQUEST_TYPE_IOCTL_COMMAND, &req->header.type);
-	writel(0, &req->header.flags);
-	writel(offsetof(struct hpt_iop_request_ioctl_command, buf)
-			+ arg->inbuf_size, &req->header.size);
-	writel((u32)(unsigned long)arg, &req->header.context);
-	writel(BITS_PER_LONG > 32 ? (u32)((unsigned long)arg>>32) : 0,
-			&req->header.context_hi32);
-	writel(IOP_RESULT_PENDING, &req->header.result);
-
-	arg->result = HPT_IOCTL_RESULT_FAILED;
-	arg->done = hptiop_ioctl_done;
-
-	writel(val, &hba->iop->inbound_queue);
-	hptiop_pci_posting_flush(hba->iop);
-
-	spin_unlock_irq(hba->host->host_lock);
-
-	wait_event_timeout(hba->ioctl_wq, arg->done == NULL, 60 * HZ);
-
-	if (arg->done != NULL) {
-		hptiop_reset_hba(hba);
-		if (ioctl_retry++ < 3)
-			goto retry;
-	}
-
-	dprintk("hpt_iop_ioctl %x result %d\n",
-			arg->ioctl_code, arg->result);
-}
-
-static int __hpt_do_ioctl(struct hptiop_hba *hba, u32 code, void *inbuf,
-			u32 insize, void *outbuf, u32 outsize)
-{
-	struct hpt_ioctl_k arg;
-	arg.hba = hba;
-	arg.ioctl_code = code;
-	arg.inbuf = inbuf;
-	arg.outbuf = outbuf;
-	arg.inbuf_size = insize;
-	arg.outbuf_size = outsize;
-	arg.bytes_returned = NULL;
-	hptiop_do_ioctl(&arg);
-	return arg.result;
-}
-
-static inline int hpt_id_valid(__le32 id)
-{
-	return id != 0 && id != cpu_to_le32(0xffffffff);
-}
-
-static int hptiop_get_controller_info(struct hptiop_hba *hba,
-					struct hpt_controller_info *pinfo)
-{
-	int id = 0;
-
-	return __hpt_do_ioctl(hba, HPT_IOCTL_GET_CONTROLLER_INFO,
-		&id, sizeof(int), pinfo, sizeof(*pinfo));
-}
-
-
-static int hptiop_get_channel_info(struct hptiop_hba *hba, int bus,
-					struct hpt_channel_info *pinfo)
-{
-	u32 ids[2];
-
-	ids[0] = 0;
-	ids[1] = bus;
-	return __hpt_do_ioctl(hba, HPT_IOCTL_GET_CHANNEL_INFO,
-				ids, sizeof(ids), pinfo, sizeof(*pinfo));
-
-}
-
-static int hptiop_get_logical_devices(struct hptiop_hba *hba,
-					__le32 *pids, int maxcount)
-{
-	int i;
-	u32 count = maxcount - 1;
-
-	if (__hpt_do_ioctl(hba, HPT_IOCTL_GET_LOGICAL_DEVICES,
-			&count, sizeof(u32),
-			pids, sizeof(u32) * maxcount))
-		return -1;
-
-	maxcount = le32_to_cpu(pids[0]);
-	for (i = 0; i < maxcount; i++)
-		pids[i] = pids[i+1];
-
-	return maxcount;
-}
-
-static int hptiop_get_device_info_v3(struct hptiop_hba *hba, __le32 id,
-				struct hpt_logical_device_info_v3 *pinfo)
-{
-	return __hpt_do_ioctl(hba, HPT_IOCTL_GET_DEVICE_INFO_V3,
-				&id, sizeof(u32),
-				pinfo, sizeof(*pinfo));
-}
-
-static const char *get_array_status(struct hpt_logical_device_info_v3 *devinfo)
-{
-	static char s[64];
-	u32 flags = le32_to_cpu(devinfo->u.array.flags);
-	u32 trans_prog = le32_to_cpu(devinfo->u.array.transforming_progress);
-	u32 reb_prog = le32_to_cpu(devinfo->u.array.rebuilding_progress);
-
-	if (flags & ARRAY_FLAG_DISABLED)
-		return "Disabled";
-	else if (flags & ARRAY_FLAG_TRANSFORMING)
-		sprintf(s, "Expanding/Migrating %d.%d%%%s%s",
-			trans_prog / 100,
-			trans_prog % 100,
-			(flags & (ARRAY_FLAG_NEEDBUILDING|ARRAY_FLAG_BROKEN))?
-					", Critical" : "",
-			((flags & ARRAY_FLAG_NEEDINITIALIZING) &&
-			 !(flags & ARRAY_FLAG_REBUILDING) &&
-			 !(flags & ARRAY_FLAG_INITIALIZING))?
-					", Unintialized" : "");
-	else if ((flags & ARRAY_FLAG_BROKEN) &&
-				devinfo->u.array.array_type != AT_RAID6)
-		return "Critical";
-	else if (flags & ARRAY_FLAG_REBUILDING)
-		sprintf(s,
-			(flags & ARRAY_FLAG_NEEDINITIALIZING)?
-				"%sBackground initializing %d.%d%%" :
-					"%sRebuilding %d.%d%%",
-			(flags & ARRAY_FLAG_BROKEN)? "Critical, " : "",
-			reb_prog / 100,
-			reb_prog % 100);
-	else if (flags & ARRAY_FLAG_VERIFYING)
-		sprintf(s, "%sVerifying %d.%d%%",
-			(flags & ARRAY_FLAG_BROKEN)? "Critical, " : "",
-			reb_prog / 100,
-			reb_prog % 100);
-	else if (flags & ARRAY_FLAG_INITIALIZING)
-		sprintf(s, "%sForground initializing %d.%d%%",
-			(flags & ARRAY_FLAG_BROKEN)? "Critical, " : "",
-			reb_prog / 100,
-			reb_prog % 100);
-	else if (flags & ARRAY_FLAG_NEEDTRANSFORM)
-		sprintf(s,"%s%s%s", "Need Expanding/Migrating",
-			(flags & ARRAY_FLAG_BROKEN)? "Critical, " : "",
-			((flags & ARRAY_FLAG_NEEDINITIALIZING) &&
-			 !(flags & ARRAY_FLAG_REBUILDING) &&
-			 !(flags & ARRAY_FLAG_INITIALIZING))?
-				", Unintialized" : "");
-	else if (flags & ARRAY_FLAG_NEEDINITIALIZING &&
-		!(flags & ARRAY_FLAG_REBUILDING) &&
-		!(flags & ARRAY_FLAG_INITIALIZING))
-		sprintf(s,"%sUninitialized",
-			(flags & ARRAY_FLAG_BROKEN)? "Critical, " : "");
-	else if ((flags & ARRAY_FLAG_NEEDBUILDING) ||
-			(flags & ARRAY_FLAG_BROKEN))
-		return "Critical";
-	else
-		return "Normal";
-	return s;
-}
-
-static void hptiop_dump_devinfo(struct hptiop_hba *hba,
-			struct hptiop_getinfo *pinfo, __le32 id, int indent)
-{
-	struct hpt_logical_device_info_v3 devinfo;
-	int i;
-	u64 capacity;
-
-	for (i = 0; i < indent; i++)
-		hptiop_copy_info(pinfo, "\t");
-
-	if (hptiop_get_device_info_v3(hba, id, &devinfo)) {
-		hptiop_copy_info(pinfo, "unknown\n");
-		return;
-	}
-
-	switch (devinfo.type) {
-
-	case LDT_DEVICE: {
-		struct hd_driveid *driveid;
-		u32 flags = le32_to_cpu(devinfo.u.device.flags);
-
-		driveid = (struct hd_driveid *)devinfo.u.device.ident;
-		/* model[] is 40 chars long, but we just want 20 chars here */
-		driveid->model[20] = 0;
-
-		if (indent)
-			if (flags & DEVICE_FLAG_DISABLED)
-				hptiop_copy_info(pinfo,"Missing\n");
-			else
-				hptiop_copy_info(pinfo, "CH%d %s\n",
-					devinfo.u.device.path_id + 1,
-					driveid->model);
-		else {
-			capacity = le64_to_cpu(devinfo.capacity) * 512;
-			do_div(capacity, 1000000);
-			hptiop_copy_info(pinfo,
-				"CH%d %s, %lluMB, %s %s%s%s%s\n",
-				devinfo.u.device.path_id + 1,
-				driveid->model,
-				capacity,
-				(flags & DEVICE_FLAG_DISABLED)?
-					"Disabled" : "Normal",
-				devinfo.u.device.read_ahead_enabled?
-						"[RA]" : "",
-				devinfo.u.device.write_cache_enabled?
-						"[WC]" : "",
-				devinfo.u.device.TCQ_enabled?
-						"[TCQ]" : "",
-				devinfo.u.device.NCQ_enabled?
-						"[NCQ]" : ""
-			);
-		}
-		break;
-	}
-
-	case LDT_ARRAY:
-		if (devinfo.target_id != INVALID_TARGET_ID)
-			hptiop_copy_info(pinfo, "[DISK %d_%d] ",
-					devinfo.vbus_id, devinfo.target_id);
-
-		capacity = le64_to_cpu(devinfo.capacity) * 512;
-		do_div(capacity, 1000000);
-		hptiop_copy_info(pinfo, "%s (%s), %lluMB, %s\n",
-			devinfo.u.array.name,
-			devinfo.u.array.array_type==AT_RAID0? "RAID0" :
-				devinfo.u.array.array_type==AT_RAID1? "RAID1" :
-				devinfo.u.array.array_type==AT_RAID5? "RAID5" :
-				devinfo.u.array.array_type==AT_RAID6? "RAID6" :
-				devinfo.u.array.array_type==AT_JBOD? "JBOD" :
-					"unknown",
-			capacity,
-			get_array_status(&devinfo));
-		for (i = 0; i < devinfo.u.array.ndisk; i++) {
-			if (hpt_id_valid(devinfo.u.array.members[i])) {
-				if (cpu_to_le16(1<<i) &
-					devinfo.u.array.critical_members)
-					hptiop_copy_info(pinfo, "\t*");
-				hptiop_dump_devinfo(hba, pinfo,
-					devinfo.u.array.members[i], indent+1);
-			}
-			else
-				hptiop_copy_info(pinfo, "\tMissing\n");
-		}
-		if (id == devinfo.u.array.transform_source) {
-			hptiop_copy_info(pinfo, "\tExpanding/Migrating to:\n");
-			hptiop_dump_devinfo(hba, pinfo,
-				devinfo.u.array.transform_target, indent+1);
-		}
-		break;
-	}
-}
-
 static ssize_t hptiop_show_version(struct class_device *class_dev, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%s\n", driver_ver);
 }
 
-static ssize_t hptiop_cdev_read(struct file *filp, char __user *buf,
-				size_t count, loff_t *ppos)
-{
-	struct hptiop_hba *hba = filp->private_data;
-	struct hptiop_getinfo info;
-	int i, j, ndev;
-	struct hpt_controller_info con_info;
-	struct hpt_channel_info chan_info;
-	__le32 ids[32];
-
-	info.buffer     = buf;
-	info.buflength  = count;
-	info.bufoffset  = ppos ? *ppos : 0;
-	info.filpos     = 0;
-	info.buffillen  = 0;
-
-	if (hptiop_get_controller_info(hba, &con_info))
-		return -EIO;
-
-	for (i = 0; i < con_info.num_buses; i++) {
-		if (hptiop_get_channel_info(hba, i, &chan_info) == 0) {
-			if (hpt_id_valid(chan_info.devices[0]))
-				hptiop_dump_devinfo(hba, &info,
-						chan_info.devices[0], 0);
-			if (hpt_id_valid(chan_info.devices[1]))
-				hptiop_dump_devinfo(hba, &info,
-						chan_info.devices[1], 0);
-		}
-	}
-
-	ndev = hptiop_get_logical_devices(hba, ids,
-					sizeof(ids) / sizeof(ids[0]));
-
-	/*
-	 * if hptiop_get_logical_devices fails, ndev==-1 and it just
-	 * output nothing here
-	 */
-	for (j = 0; j < ndev; j++)
-		hptiop_dump_devinfo(hba, &info, ids[j], 0);
-
-	if (ppos)
-		*ppos += info.buffillen;
-
-	return info.buffillen;
-}
-
-static int hptiop_cdev_ioctl(struct inode *inode,  struct file *file,
-					unsigned int cmd, unsigned long arg)
-{
-	struct hptiop_hba *hba = file->private_data;
-	struct hpt_ioctl_u ioctl_u;
-	struct hpt_ioctl_k ioctl_k;
-	u32 bytes_returned;
-	int err = -EINVAL;
-
-	if (copy_from_user(&ioctl_u,
-		(void __user *)arg, sizeof(struct hpt_ioctl_u)))
-		return -EINVAL;
-
-	if (ioctl_u.magic != HPT_IOCTL_MAGIC)
-		return -EINVAL;
-
-	ioctl_k.ioctl_code = ioctl_u.ioctl_code;
-	ioctl_k.inbuf = NULL;
-	ioctl_k.inbuf_size = ioctl_u.inbuf_size;
-	ioctl_k.outbuf = NULL;
-	ioctl_k.outbuf_size = ioctl_u.outbuf_size;
-	ioctl_k.hba = hba;
-	ioctl_k.bytes_returned = &bytes_returned;
-
-	/* verify user buffer */
-	if ((ioctl_k.inbuf_size && !access_ok(VERIFY_READ,
-			ioctl_u.inbuf, ioctl_k.inbuf_size)) ||
-		(ioctl_k.outbuf_size && !access_ok(VERIFY_WRITE,
-			ioctl_u.outbuf, ioctl_k.outbuf_size)) ||
-		(ioctl_u.bytes_returned && !access_ok(VERIFY_WRITE,
-			ioctl_u.bytes_returned, sizeof(u32))) ||
-		ioctl_k.inbuf_size + ioctl_k.outbuf_size > 0x10000) {
-
-		dprintk("scsi%d: got bad user address\n", hba->host->host_no);
-		return -EINVAL;
-	}
-
-	/* map buffer to kernel. */
-	if (ioctl_k.inbuf_size) {
-		ioctl_k.inbuf = kmalloc(ioctl_k.inbuf_size, GFP_KERNEL);
-		if (!ioctl_k.inbuf) {
-			dprintk("scsi%d: fail to alloc inbuf\n",
-					hba->host->host_no);
-			err = -ENOMEM;
-			goto err_exit;
-		}
-
-		if (copy_from_user(ioctl_k.inbuf,
-				ioctl_u.inbuf, ioctl_k.inbuf_size)) {
-			goto err_exit;
-		}
-	}
-
-	if (ioctl_k.outbuf_size) {
-		ioctl_k.outbuf = kmalloc(ioctl_k.outbuf_size, GFP_KERNEL);
-		if (!ioctl_k.outbuf) {
-			dprintk("scsi%d: fail to alloc outbuf\n",
-					hba->host->host_no);
-			err = -ENOMEM;
-			goto err_exit;
-		}
-	}
-
-	hptiop_do_ioctl(&ioctl_k);
-
-	if (ioctl_k.result == HPT_IOCTL_RESULT_OK) {
-		if (ioctl_k.outbuf_size &&
-			copy_to_user(ioctl_u.outbuf,
-				ioctl_k.outbuf, ioctl_k.outbuf_size))
-			goto err_exit;
-
-		if (ioctl_u.bytes_returned &&
-			copy_to_user(ioctl_u.bytes_returned,
-				&bytes_returned, sizeof(u32)))
-			goto err_exit;
-
-		err = 0;
-	}
-
-err_exit:
-	kfree(ioctl_k.inbuf);
-	kfree(ioctl_k.outbuf);
-
-	return err;
-}
-
-static int hptiop_cdev_open(struct inode *inode, struct file *file)
-{
-	struct hptiop_hba *hba;
-	unsigned i = 0, minor = iminor(inode);
-	int ret = -ENODEV;
-
-	spin_lock(&hptiop_hba_list_lock);
-	list_for_each_entry(hba, &hptiop_hba_list, link) {
-		if (i == minor) {
-			file->private_data = hba;
-			ret = 0;
-			goto out;
-		}
-		i++;
-	}
-
-out:
-	spin_unlock(&hptiop_hba_list_lock);
-	return ret;
-}
-
-static struct file_operations hptiop_cdev_fops = {
-	.owner = THIS_MODULE,
-	.read  = hptiop_cdev_read,
-	.ioctl = hptiop_cdev_ioctl,
-	.open  = hptiop_cdev_open,
-};
-
 static ssize_t hptiop_show_fw_version(struct class_device *class_dev, char *buf)
 {
 	struct Scsi_Host *host = class_to_shost(class_dev);
@@ -1296,19 +771,13 @@ static int __devinit hptiop_probe(struct pci_dev *pcidev,
 		goto unmap_pci_bar;
 	}
 
-	if (scsi_add_host(host, &pcidev->dev)) {
-		printk(KERN_ERR "scsi%d: scsi_add_host failed\n",
-					hba->host->host_no);
-		goto unmap_pci_bar;
-	}
-
 	pci_set_drvdata(pcidev, host);
 
 	if (request_irq(pcidev->irq, hptiop_intr, IRQF_SHARED,
 					driver_name, hba)) {
 		printk(KERN_ERR "scsi%d: request irq %d failed\n",
 					hba->host->host_no, pcidev->irq);
-		goto remove_scsi_host;
+		goto unmap_pci_bar;
 	}
 
 	/* Allocate request mem */
@@ -1355,9 +824,12 @@ static int __devinit hptiop_probe(struct pci_dev *pcidev,
 	if (hptiop_initialize_iop(hba))
 		goto free_request_mem;
 
-	spin_lock(&hptiop_hba_list_lock);
-	list_add_tail(&hba->link, &hptiop_hba_list);
-	spin_unlock(&hptiop_hba_list_lock);
+	if (scsi_add_host(host, &pcidev->dev)) {
+		printk(KERN_ERR "scsi%d: scsi_add_host failed\n",
+					hba->host->host_no);
+		goto free_request_mem;
+	}
+
 
 	scsi_scan_host(host);
 
@@ -1372,9 +844,6 @@ free_request_mem:
 free_request_irq:
 	free_irq(hba->pcidev->irq, hba);
 
-remove_scsi_host:
-	scsi_remove_host(host);
-
 unmap_pci_bar:
 	iounmap(hba->iop);
 
@@ -1422,10 +891,6 @@ static void hptiop_remove(struct pci_dev *pcidev)
 
 	scsi_remove_host(host);
 
-	spin_lock(&hptiop_hba_list_lock);
-	list_del_init(&hba->link);
-	spin_unlock(&hptiop_hba_list_lock);
-
 	hptiop_shutdown(pcidev);
 
 	free_irq(hba->pcidev->irq, hba);
@@ -1462,27 +927,12 @@ static struct pci_driver hptiop_pci_driver = {
 
 static int __init hptiop_module_init(void)
 {
-	int error;
-
 	printk(KERN_INFO "%s %s\n", driver_name_long, driver_ver);
-
-	error = pci_register_driver(&hptiop_pci_driver);
-	if (error < 0)
-		return error;
-
-	hptiop_cdev_major = register_chrdev(0, "hptiop", &hptiop_cdev_fops);
-	if (hptiop_cdev_major < 0) {
-		printk(KERN_WARNING "unable to register hptiop device.\n");
-		return hptiop_cdev_major;
-	}
-
-	return 0;
+	return pci_register_driver(&hptiop_pci_driver);
 }
 
 static void __exit hptiop_module_exit(void)
 {
-	dprintk("hptiop_module_exit\n");
-	unregister_chrdev(hptiop_cdev_major, "hptiop");
 	pci_unregister_driver(&hptiop_pci_driver);
 }
 
-- 
cgit v0.10.2


From 77d88ee275aeba5da447987f30401bbd4c901ca9 Mon Sep 17 00:00:00 2001
From: Michael Reed <mdr@sgi.com>
Date: Mon, 31 Jul 2006 12:19:40 -0500
Subject: [SCSI] mptfc: properly wait for firmware target discovery to complete

Based upon a conversation I had with LSI's fibre channel firmware guru,
this patch adds another condition under which the driver waits for the
firmware link initialization / target discovery to complete.

Signed-off-by: Michael Reed <mdr@sgi.com>
Acked-by: Moore, Eric <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index 90da7d6..2144554 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -669,7 +669,10 @@ mptfc_GetFcPortPage0(MPT_ADAPTER *ioc, int portnum)
 			 * if still doing discovery,
 			 * hang loose a while until finished
 			 */
-			if (pp0dest->PortState == MPI_FCPORTPAGE0_PORTSTATE_UNKNOWN) {
+			if ((pp0dest->PortState == MPI_FCPORTPAGE0_PORTSTATE_UNKNOWN) ||
+			    (pp0dest->PortState == MPI_FCPORTPAGE0_PORTSTATE_ONLINE &&
+			     (pp0dest->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_TYPE_MASK)
+			      == MPI_FCPORTPAGE0_FLAGS_ATTACH_NO_INIT)) {
 				if (count-- > 0) {
 					msleep(100);
 					goto try_again;
-- 
cgit v0.10.2


From 3a0c56d801df6785b30e36c19e89d7e971c151da Mon Sep 17 00:00:00 2001
From: Michael Reed <mdr@sgi.com>
Date: Mon, 31 Jul 2006 12:19:50 -0500
Subject: [SCSI] mptfc: correct out of order event processing

This patch corrects a problem in mptfc which can result in targets
being removed after executing an "lsiutil 99" reset of the fibre
channel ports.

The last rescan event was being processed before the setup reset work
due to an inappropriate optimization in the event processing logic.
Every rescan event is now queued for execution and the setup reset
work now executes in the proper sequence.

Signed-off-by: Michael Reed <mdr@sgi.com>
Acked-by: Moore, Eric <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index d4cb144..c537d71 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -640,7 +640,6 @@ typedef struct _MPT_ADAPTER
 	struct work_struct	 fc_setup_reset_work;
 	struct list_head	 fc_rports;
 	spinlock_t		 fc_rescan_work_lock;
-	int			 fc_rescan_work_count;
 	struct work_struct	 fc_rescan_work;
 	char			 fc_rescan_work_q_name[KOBJ_NAME_LEN];
 	struct workqueue_struct *fc_rescan_work_q;
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index 2144554..85696f3 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -898,59 +898,45 @@ mptfc_rescan_devices(void *arg)
 {
 	MPT_ADAPTER		*ioc = (MPT_ADAPTER *)arg;
 	int			ii;
-	int			work_to_do;
 	u64			pn;
-	unsigned long		flags;
 	struct mptfc_rport_info *ri;
 
-	do {
-		/* start by tagging all ports as missing */
-		list_for_each_entry(ri, &ioc->fc_rports, list) {
-			if (ri->flags & MPT_RPORT_INFO_FLAGS_REGISTERED) {
-				ri->flags |= MPT_RPORT_INFO_FLAGS_MISSING;
-			}
+	/* start by tagging all ports as missing */
+	list_for_each_entry(ri, &ioc->fc_rports, list) {
+		if (ri->flags & MPT_RPORT_INFO_FLAGS_REGISTERED) {
+			ri->flags |= MPT_RPORT_INFO_FLAGS_MISSING;
 		}
+	}
 
-		/*
-		 * now rescan devices known to adapter,
-		 * will reregister existing rports
-		 */
-		for (ii=0; ii < ioc->facts.NumberOfPorts; ii++) {
-			(void) mptfc_GetFcPortPage0(ioc, ii);
-			mptfc_init_host_attr(ioc,ii);	/* refresh */
-			mptfc_GetFcDevPage0(ioc,ii,mptfc_register_dev);
-		}
+	/*
+	 * now rescan devices known to adapter,
+	 * will reregister existing rports
+	 */
+	for (ii=0; ii < ioc->facts.NumberOfPorts; ii++) {
+		(void) mptfc_GetFcPortPage0(ioc, ii);
+		mptfc_init_host_attr(ioc, ii);	/* refresh */
+		mptfc_GetFcDevPage0(ioc, ii, mptfc_register_dev);
+	}
 
-		/* delete devices still missing */
-		list_for_each_entry(ri, &ioc->fc_rports, list) {
-			/* if newly missing, delete it */
-			if (ri->flags & MPT_RPORT_INFO_FLAGS_MISSING) {
+	/* delete devices still missing */
+	list_for_each_entry(ri, &ioc->fc_rports, list) {
+		/* if newly missing, delete it */
+		if (ri->flags & MPT_RPORT_INFO_FLAGS_MISSING) {
 
-				ri->flags &= ~(MPT_RPORT_INFO_FLAGS_REGISTERED|
-					       MPT_RPORT_INFO_FLAGS_MISSING);
-				fc_remote_port_delete(ri->rport);	/* won't sleep */
-				ri->rport = NULL;
+			ri->flags &= ~(MPT_RPORT_INFO_FLAGS_REGISTERED|
+				       MPT_RPORT_INFO_FLAGS_MISSING);
+			fc_remote_port_delete(ri->rport);	/* won't sleep */
+			ri->rport = NULL;
 
-				pn = (u64)ri->pg0.WWPN.High << 32 |
-				     (u64)ri->pg0.WWPN.Low;
-				dfcprintk ((MYIOC_s_INFO_FMT
-					"mptfc_rescan.%d: %llx deleted\n",
-					ioc->name,
-					ioc->sh->host_no,
-					(unsigned long long)pn));
-			}
+			pn = (u64)ri->pg0.WWPN.High << 32 |
+			     (u64)ri->pg0.WWPN.Low;
+			dfcprintk ((MYIOC_s_INFO_FMT
+				"mptfc_rescan.%d: %llx deleted\n",
+				ioc->name,
+				ioc->sh->host_no,
+				(unsigned long long)pn));
 		}
-
-		/*
-		 * allow multiple passes as target state
-		 * might have changed during scan
-		 */
-		spin_lock_irqsave(&ioc->fc_rescan_work_lock, flags);
-		if (ioc->fc_rescan_work_count > 2) 	/* only need one more */
-			ioc->fc_rescan_work_count = 2;
-		work_to_do = --ioc->fc_rescan_work_count;
-		spin_unlock_irqrestore(&ioc->fc_rescan_work_lock, flags);
-	} while (work_to_do);
+	}
 }
 
 static int
@@ -1162,7 +1148,6 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 *	by doing it via the workqueue, some locking is eliminated
 	 */
 
-	ioc->fc_rescan_work_count = 1;
 	queue_work(ioc->fc_rescan_work_q, &ioc->fc_rescan_work);
 	flush_workqueue(ioc->fc_rescan_work_q);
 
@@ -1205,10 +1190,8 @@ mptfc_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 	case MPI_EVENT_RESCAN:
 		spin_lock_irqsave(&ioc->fc_rescan_work_lock, flags);
 		if (ioc->fc_rescan_work_q) {
-			if (ioc->fc_rescan_work_count++ == 0) {
-				queue_work(ioc->fc_rescan_work_q,
-					   &ioc->fc_rescan_work);
-			}
+			queue_work(ioc->fc_rescan_work_q,
+				   &ioc->fc_rescan_work);
 		}
 		spin_unlock_irqrestore(&ioc->fc_rescan_work_lock, flags);
 		break;
@@ -1251,10 +1234,8 @@ mptfc_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 		mptfc_SetFcPortPage1_defaults(ioc);
 		spin_lock_irqsave(&ioc->fc_rescan_work_lock, flags);
 		if (ioc->fc_rescan_work_q) {
-			if (ioc->fc_rescan_work_count++ == 0) {
-				queue_work(ioc->fc_rescan_work_q,
-					   &ioc->fc_rescan_work);
-			}
+			queue_work(ioc->fc_rescan_work_q,
+				   &ioc->fc_rescan_work);
 		}
 		spin_unlock_irqrestore(&ioc->fc_rescan_work_lock, flags);
 	}
-- 
cgit v0.10.2


From 51704c609fcf256dacfcfae3622eb6ef53ac5b48 Mon Sep 17 00:00:00 2001
From: Albert Lee <albertcc@tw.ibm.com>
Date: Wed, 9 Aug 2006 18:36:22 +0800
Subject: [PATCH] libata: Use ATA_FLAG_PIO_POLLING for pdc_adma

pdc_adma was overlooked and broken by the irq-pio patch:
Only HSM_ST_LAST interrupts should be delivered to this LLDD.

Adding ATA_FLAG_PIO_POLLING to pdc_adma fixes the problem (temporarily),
before we convert the irq handler of pdc_adma to handle all interrupts.

Signed-off-by: Albert Lee <albertcc@tw.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/scsi/pdc_adma.c b/drivers/scsi/pdc_adma.c
index d1f38c3..efc8fff 100644
--- a/drivers/scsi/pdc_adma.c
+++ b/drivers/scsi/pdc_adma.c
@@ -183,7 +183,8 @@ static struct ata_port_info adma_port_info[] = {
 	{
 		.sht		= &adma_ata_sht,
 		.host_flags	= ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST |
-				  ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO,
+				  ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO |
+				  ATA_FLAG_PIO_POLLING,
 		.pio_mask	= 0x10, /* pio4 */
 		.udma_mask	= 0x1f, /* udma0-4 */
 		.port_ops	= &adma_ata_ops,
-- 
cgit v0.10.2


From a34b6fc04d58ad72fe0cc74cd448f4551bd2ebaf Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@bork.org>
Date: Wed, 5 Jul 2006 15:06:13 -0400
Subject: [PATCH] libata: PHY reset requires writing 0x4 to SControl

Hi,

Reading the Intel VSC and AHCI it seems like writing 0x302 is incorrect.
The only valid values are 4, 1 and 0.  Writing 4 disables the
PHY.

Signed-off-by: Martin Hicks <mort@bork.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 16fc2dd..73dd6c8 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -2746,7 +2746,7 @@ int sata_std_hardreset(struct ata_port *ap, unsigned int *class)
 		if ((rc = sata_scr_read(ap, SCR_CONTROL, &scontrol)))
 			return rc;
 
-		scontrol = (scontrol & 0x0f0) | 0x302;
+		scontrol = (scontrol & 0x0f0) | 0x304;
 
 		if ((rc = sata_scr_write(ap, SCR_CONTROL, scontrol)))
 			return rc;
-- 
cgit v0.10.2


From e54b82d739d4a2ef992976c8c0692cdf89286420 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Thu, 10 Aug 2006 10:46:56 -0700
Subject: IB/mthca: Make fence flag work for send work requests

The fence bit needs to be set in the doorbell too, not just the WQE.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index cd8b672..157b4f8 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -99,6 +99,10 @@ enum {
 	MTHCA_QP_BIT_RSC = 1 <<  3
 };
 
+enum {
+	MTHCA_SEND_DOORBELL_FENCE = 1 << 5
+};
+
 struct mthca_qp_path {
 	__be32 port_pkey;
 	u8     rnr_retry;
@@ -1502,7 +1506,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	int i;
 	int size;
 	int size0 = 0;
-	u32 f0 = 0;
+	u32 f0;
 	int ind;
 	u8 op0 = 0;
 
@@ -1686,6 +1690,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		if (!size0) {
 			size0 = size;
 			op0   = mthca_opcode[wr->opcode];
+			f0    = wr->send_flags & IB_SEND_FENCE ?
+				MTHCA_SEND_DOORBELL_FENCE : 0;
 		}
 
 		++ind;
@@ -1843,7 +1849,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	int i;
 	int size;
 	int size0 = 0;
-	u32 f0 = 0;
+	u32 f0;
 	int ind;
 	u8 op0 = 0;
 
@@ -2051,6 +2057,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		if (!size0) {
 			size0 = size;
 			op0   = mthca_opcode[wr->opcode];
+			f0    = wr->send_flags & IB_SEND_FENCE ?
+				MTHCA_SEND_DOORBELL_FENCE : 0;
 		}
 
 		++ind;
-- 
cgit v0.10.2


From a19aa5c5fdda8b556ab238177ee27c5ef7873c94 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 11 Aug 2006 08:56:57 -0700
Subject: IB/mthca: Fix potential AB-BA deadlock with CQ locks

When destroying a QP, mthca locks both the QP's send CQ and receive
CQ.  However, the following scenario is perfectly valid:

    QP_a: send_cq == CQ_x, recv_cq == CQ_y
    QP_b: send_cq == CQ_y, recv_cq == CQ_x

The old mthca code simply locked send_cq and then recv_cq, which in
this case could lead to an AB-BA deadlock if QP_a and QP_b were
destroyed simultaneously.

We can fix this by changing the locking code to lock the CQ with the
lower CQ number first, which will create a consistent lock ordering.
Also, the second CQ is locked with spin_lock_nested() to tell lockdep
that we know what we're doing with the lock nesting.

This bug was found by lockdep.

Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 8de2887..9a5bece 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -136,8 +136,8 @@ struct mthca_ah {
  * We have one global lock that protects dev->cq/qp_table.  Each
  * struct mthca_cq/qp also has its own lock.  An individual qp lock
  * may be taken inside of an individual cq lock.  Both cqs attached to
- * a qp may be locked, with the send cq locked first.  No other
- * nesting should be done.
+ * a qp may be locked, with the cq with the lower cqn locked first.
+ * No other nesting should be done.
  *
  * Each struct mthca_cq/qp also has an ref count, protected by the
  * corresponding table lock.  The pointer from the cq/qp_table to the
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 157b4f8..2e8f6f3 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1263,6 +1263,32 @@ int mthca_alloc_qp(struct mthca_dev *dev,
 	return 0;
 }
 
+static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+{
+	if (send_cq == recv_cq)
+		spin_lock_irq(&send_cq->lock);
+	else if (send_cq->cqn < recv_cq->cqn) {
+		spin_lock_irq(&send_cq->lock);
+		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+	} else {
+		spin_lock_irq(&recv_cq->lock);
+		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+	}
+}
+
+static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+{
+	if (send_cq == recv_cq)
+		spin_unlock_irq(&send_cq->lock);
+	else if (send_cq->cqn < recv_cq->cqn) {
+		spin_unlock(&recv_cq->lock);
+		spin_unlock_irq(&send_cq->lock);
+	} else {
+		spin_unlock(&send_cq->lock);
+		spin_unlock_irq(&recv_cq->lock);
+	}
+}
+
 int mthca_alloc_sqp(struct mthca_dev *dev,
 		    struct mthca_pd *pd,
 		    struct mthca_cq *send_cq,
@@ -1315,17 +1341,13 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
 	 * Lock CQs here, so that CQ polling code can do QP lookup
 	 * without taking a lock.
 	 */
-	spin_lock_irq(&send_cq->lock);
-	if (send_cq != recv_cq)
-		spin_lock(&recv_cq->lock);
+	mthca_lock_cqs(send_cq, recv_cq);
 
 	spin_lock(&dev->qp_table.lock);
 	mthca_array_clear(&dev->qp_table.qp, mqpn);
 	spin_unlock(&dev->qp_table.lock);
 
-	if (send_cq != recv_cq)
-		spin_unlock(&recv_cq->lock);
-	spin_unlock_irq(&send_cq->lock);
+	mthca_unlock_cqs(send_cq, recv_cq);
 
  err_out:
 	dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1359,9 +1381,7 @@ void mthca_free_qp(struct mthca_dev *dev,
 	 * Lock CQs here, so that CQ polling code can do QP lookup
 	 * without taking a lock.
 	 */
-	spin_lock_irq(&send_cq->lock);
-	if (send_cq != recv_cq)
-		spin_lock(&recv_cq->lock);
+	mthca_lock_cqs(send_cq, recv_cq);
 
 	spin_lock(&dev->qp_table.lock);
 	mthca_array_clear(&dev->qp_table.qp,
@@ -1369,9 +1389,7 @@ void mthca_free_qp(struct mthca_dev *dev,
 	--qp->refcount;
 	spin_unlock(&dev->qp_table.lock);
 
-	if (send_cq != recv_cq)
-		spin_unlock(&recv_cq->lock);
-	spin_unlock_irq(&send_cq->lock);
+	mthca_unlock_cqs(send_cq, recv_cq);
 
 	wait_event(qp->wait, !get_qp_refcount(dev, qp));
 
-- 
cgit v0.10.2


From 0ee6a17389ceef65f1a86c38872fa98f08489022 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 11 Aug 2006 08:30:31 +0200
Subject: ACPI: fix kfree in i2c_ec error path

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/i2c_ec.c b/drivers/acpi/i2c_ec.c
index 84239d5..6809c28 100644
--- a/drivers/acpi/i2c_ec.c
+++ b/drivers/acpi/i2c_ec.c
@@ -330,7 +330,7 @@ static int acpi_ec_hc_add(struct acpi_device *device)
 	status = acpi_evaluate_integer(ec_hc->handle, "_EC", NULL, &val);
 	if (ACPI_FAILURE(status)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_WARN, "Error obtaining _EC\n"));
-		kfree(ec_hc->smbus);
+		kfree(ec_hc);
 		kfree(smbus);
 		return -EIO;
 	}
-- 
cgit v0.10.2


From a0c5a64552e3c57d7f9eb593c6ce21a285ac86b4 Mon Sep 17 00:00:00 2001
From: Yoav Steinberg <yoav@monfort.co.il>
Date: Sun, 13 Aug 2006 14:17:12 +0100
Subject: [ARM] 3752/1: fix versatile flash resource map

Patch from Yoav Steinberg

Flash resource mapping for versatile machine included one extra byte for the end address. This results in failure to map other resources on physical address directly after the NOR flash.

Signed-off-by: Yoav Steinberg <yoav@monfort.co.il>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index c4e3f8c..f2bbef0 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -285,7 +285,7 @@ static struct flash_platform_data versatile_flash_data = {
 
 static struct resource versatile_flash_resource = {
 	.start			= VERSATILE_FLASH_BASE,
-	.end			= VERSATILE_FLASH_BASE + VERSATILE_FLASH_SIZE,
+	.end			= VERSATILE_FLASH_BASE + VERSATILE_FLASH_SIZE - 1,
 	.flags			= IORESOURCE_MEM,
 };
 
-- 
cgit v0.10.2


From 4d8316d5ea4dcf0bf15d8a06d539ed7c99e9cfbe Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Mon, 14 Aug 2006 22:37:22 -0700
Subject: ACPI: fix boot with acpi=off

Fix acpi_ac/battery boot with acpi=off

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 96309b9..11abc7b 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -285,6 +285,8 @@ static int __init acpi_ac_init(void)
 {
 	int result;
 
+	if (acpi_disabled)
+		return -ENODEV;
 
 	acpi_ac_dir = acpi_lock_ac_dir();
 	if (!acpi_ac_dir)
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 6e52217..9810e2a 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -757,6 +757,9 @@ static int __init acpi_battery_init(void)
 {
 	int result;
 
+	if (acpi_disabled)
+		return -ENODEV;
+
 	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir)
 		return -ENODEV;
-- 
cgit v0.10.2


From b20d2aeb0ad322cbe7fd9120acae6118231b17a3 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 15 Aug 2006 23:21:37 -0400
Subject: ACPI: skip smart battery init when acpi=off

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index db7b350..62bef0b 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -1714,6 +1714,9 @@ static int __init acpi_sbs_init(void)
 {
 	int result = 0;
 
+	if (acpi_disabled)
+		return -ENODEV;
+
 	init_MUTEX(&sbs_sem);
 
 	if (capacity_mode != DEF_CAPACITY_UNIT
-- 
cgit v0.10.2


From 7daef60721e03809c7e5f8aa8491df4190f6b56f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 14 Aug 2006 22:37:24 -0700
Subject: ACPI: add message if firmware_register() init fails

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index b297769..7b77ee1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
@@ -738,7 +739,10 @@ static int __init acpi_init(void)
 		return -ENODEV;
 	}
 
-	firmware_register(&acpi_subsys);
+	result = firmware_register(&acpi_subsys);
+	if (result < 0)
+		printk(KERN_WARNING "%s: firmware_register error: %d\n",
+			__FUNCTION__, result);
 
 	result = acpi_bus_init();
 
-- 
cgit v0.10.2


From e9a315bcae3b9e0c54fb68ef90d0095956314480 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 14 Aug 2006 22:37:24 -0700
Subject: ACPI: verbose on kset/kobject_register errors

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 5fcb50c..698a154 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -4,6 +4,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/acpi.h>
 
 #include <acpi/acpi_drivers.h>
@@ -113,6 +114,8 @@ static struct kset acpi_namespace_kset = {
 static void acpi_device_register(struct acpi_device *device,
 				 struct acpi_device *parent)
 {
+	int err;
+
 	/*
 	 * Linkage
 	 * -------
@@ -138,7 +141,10 @@ static void acpi_device_register(struct acpi_device *device,
 		device->kobj.parent = &parent->kobj;
 	device->kobj.ktype = &ktype_acpi_ns;
 	device->kobj.kset = &acpi_namespace_kset;
-	kobject_register(&device->kobj);
+	err = kobject_register(&device->kobj);
+	if (err < 0)
+		printk(KERN_WARNING "%s: kobject_register error: %d\n",
+			__FUNCTION__, err);
 	create_sysfs_device_files(device);
 }
 
@@ -1450,7 +1456,9 @@ static int __init acpi_scan_init(void)
 	if (acpi_disabled)
 		return 0;
 
-	kset_register(&acpi_namespace_kset);
+	result = kset_register(&acpi_namespace_kset);
+	if (result < 0)
+		printk(KERN_ERR PREFIX "kset_register error: %d\n", result);
 
 	result = bus_register(&acpi_bus_type);
 	if (result) {
-- 
cgit v0.10.2


From 07dd4855e7fffeb50565826e5e736509ee8f6129 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Mon, 14 Aug 2006 22:37:32 -0700
Subject: ACPI: memory hotplug: remove useless message at boot time

This is to remove noisy useless message at boot.  The message is a ton of
"ACPI Exception (acpi_memory-0492): AE_ERROR, handle is no memory device"

In my emulation, number of memory devices are not so many (only 6), but,
this messages are displayed 114 times.

It is showed by acpi_memory_register_notify_handler() which is called by
acpi_walk_namespace().

acpi_walk_namespace() parses all of ACPI's namespace and execute
acpi_memory_register_notify_handler().  So, it is called for all of the
device which is defined in namespace.  If the parsing device is not memory,
acpi_memhotplug ignores it due to "no match" and will parse next device.
This is normal route, not an exception.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index b0d4b14..1dda370 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -484,10 +484,8 @@ acpi_memory_register_notify_handler(acpi_handle handle,
 
 
 	status = is_memory_device(handle);
-	if (ACPI_FAILURE(status)){
-		ACPI_EXCEPTION((AE_INFO, status, "handle is no memory device"));
+	if (ACPI_FAILURE(status))
 		return AE_OK;	/* continue */
-	}
 
 	status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
 					     acpi_memory_device_notify, NULL);
@@ -503,10 +501,8 @@ acpi_memory_deregister_notify_handler(acpi_handle handle,
 
 
 	status = is_memory_device(handle);
-	if (ACPI_FAILURE(status)){
-		ACPI_EXCEPTION((AE_INFO, status, "handle is no memory device"));
+	if (ACPI_FAILURE(status))
 		return AE_OK;	/* continue */
-	}
 
 	status = acpi_remove_notify_handler(handle,
 					    ACPI_SYSTEM_NOTIFY,
-- 
cgit v0.10.2


From b5240b32b9b2b75917c478d768191862a2b190cc Mon Sep 17 00:00:00 2001
From: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Date: Wed, 26 Jul 2006 13:32:00 -0400
Subject: ACPIPHP: allow acpiphp to build without ACPI_DOCK

Change the build options for acpiphp so that it may build without being
dependent on the ACPI_DOCK option, but yet does not allow the option of
acpiphp being built-in when dock is built as a module.
This does not change the previous patch for ACPI_IBM_DOCK Kconfig.

For the following matrix of config options, I built an i386 kernel.

Dock		acpiphp		should it build?	confirmed
y		y		y			y
y		n		y			y
y		m		y			y
m		y		no - acpiphp should	acpiphp was
				     convert to m	converted to m
m		n		y			y
m		m		y			y
n		y		y			y
n		n		y			y
n		m		y			y


Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index 3fae77f..1c363d8 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -76,7 +76,7 @@ config HOTPLUG_PCI_IBM
 
 config HOTPLUG_PCI_ACPI
 	tristate "ACPI PCI Hotplug driver"
-	depends on ACPI_DOCK && HOTPLUG_PCI
+	depends on (!ACPI_DOCK && ACPI && HOTPLUG_PCI) || (ACPI_DOCK && HOTPLUG_PCI)
 	help
 	  Say Y here if you have a system that supports PCI Hotplug using
 	  ACPI.
-- 
cgit v0.10.2


From acaea9ee460d0ba5a14f0066ba26cfa43dd5fdf3 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@mellanox.co.il>
Date: Tue, 15 Aug 2006 17:20:50 +0300
Subject: IB/core: Fix SM LID/LID change with client reregister set

After commit 12bbb2b7be7f5564952ebe0196623e97464b8ac5, when SM LID
change or LID change MAD also has a client reregistration bit set,
only CLIENT_REREGISTER event is generated.

As a result, the sa_query module and the cache module don't update the
port information, and ULPs (e.g. IPoIB) stop working.  This is the
regression we observe as compared to 2.6.17.

Rather than generate multiple events (which would have negative
performance impact), let us simply let cache and SA query respond to
reregister event in the same way as to LID and SM change events.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index e05ca2c..75313ad 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -301,7 +301,8 @@ static void ib_cache_event(struct ib_event_handler *handler,
 	    event->event == IB_EVENT_PORT_ACTIVE ||
 	    event->event == IB_EVENT_LID_CHANGE  ||
 	    event->event == IB_EVENT_PKEY_CHANGE ||
-	    event->event == IB_EVENT_SM_CHANGE) {
+	    event->event == IB_EVENT_SM_CHANGE   ||
+	    event->event == IB_EVENT_CLIENT_REREGISTER) {
 		work = kmalloc(sizeof *work, GFP_ATOMIC);
 		if (work) {
 			INIT_WORK(&work->work, ib_cache_task, work);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index aeda484..d6b8422 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -405,7 +405,8 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
 	    event->event == IB_EVENT_PORT_ACTIVE ||
 	    event->event == IB_EVENT_LID_CHANGE  ||
 	    event->event == IB_EVENT_PKEY_CHANGE ||
-	    event->event == IB_EVENT_SM_CHANGE) {
+	    event->event == IB_EVENT_SM_CHANGE   ||
+	    event->event == IB_EVENT_CLIENT_REREGISTER) {
 		struct ib_sa_device *sa_dev;
 		sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
 
-- 
cgit v0.10.2


From 4e6e6504a4572dee3afcb0925ce92ad559e1e0db Mon Sep 17 00:00:00 2001
From: William Morrrow <william.morrow@amd.com>
Date: Mon, 14 Aug 2006 22:37:31 -0700
Subject: ACPI: Handle BIOS that resumes from S3 to suspend routine rather than
 resume vector

A BIOS has been found that resumes from S3 to the routine that invoked suspend,
ignoring the resume vector.  This appears to the OS as a failed S3 attempt.

This same system suspend/resume's properly with Windows.

It is possible to invoke the protected mode register restore routine (which
would normally restore the sysenter registers) when the BIOS returns from
S3.  This has no effect on a correctly running system and repairs the
damage from the deviant BIOS.

Signed-off-by: William Morrow <william.morrow@amd.com>
Signed-off-by: Jordan Crouse <jordan.crouse@amd.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index 9f408ee..b781b38 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -292,7 +292,10 @@ ENTRY(do_suspend_lowlevel)
 	pushl	$3
 	call	acpi_enter_sleep_state
 	addl	$4, %esp
-	ret
+
+#	In case of S3 failure, we'll emerge here.  Jump
+# 	to ret_point to recover
+	jmp	ret_point
 	.p2align 4,,7
 ret_point:
 	call	restore_registers
-- 
cgit v0.10.2


From 5672bde6355f2d12c49df1eec083d25afe489063 Mon Sep 17 00:00:00 2001
From: Handle X <xhandle@gmail.com>
Date: Mon, 14 Aug 2006 22:37:27 -0700
Subject: ACPI: hotkey.c fixes, fix for potential crash of hotkey.c

While going through the code, I found out some memory leaks and potential
crashes in drivers/acpi/hotkey.c Please find the patch to fix them.

This patch does the following,

1. Fixes memory leaks in error paths of hotkey_write_config

2. Fixes freeing unallocated pointers in the error paths of hotkey_write_config

3. Uses a loop instead of linear searching for parsing the userspace
   input in get_params

4. Uses array of char * instead of passing 4 pointer parameters
   explicitly into the init_{poll_}hotkey_* static functions

Signed-off-by: Andrew Morton <akpm@osdl.org>
Acked-by: Luming Yu <luming.yu@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/hotkey.c b/drivers/acpi/hotkey.c
index 32c9d88..1ba2db6 100644
--- a/drivers/acpi/hotkey.c
+++ b/drivers/acpi/hotkey.c
@@ -91,6 +91,14 @@ enum {
 	HK_EVENT_ENTERRING_S5,
 };
 
+enum conf_entry_enum {
+	bus_handle = 0,
+	bus_method = 1,
+	action_handle = 2,
+	method = 3,
+	LAST_CONF_ENTRY
+};
+
 /*  procdir we use */
 static struct proc_dir_entry *hotkey_proc_dir;
 static struct proc_dir_entry *hotkey_config;
@@ -244,19 +252,15 @@ static int hotkey_info_open_fs(struct inode *inode, struct file *file)
 
 static char *format_result(union acpi_object *object)
 {
-	char *buf = NULL;
-
-	buf = (char *)kmalloc(RESULT_STR_LEN, GFP_KERNEL);
-	if (buf)
-		memset(buf, 0, RESULT_STR_LEN);
-	else
-		goto do_fail;
+	char *buf;
 
+	buf = kzalloc(RESULT_STR_LEN, GFP_KERNEL);
+	if (!buf)
+		return NULL;
 	/* Now, just support integer type */
 	if (object->type == ACPI_TYPE_INTEGER)
 		sprintf(buf, "%d\n", (u32) object->integer.value);
-      do_fail:
-	return (buf);
+	return buf;
 }
 
 static int hotkey_polling_seq_show(struct seq_file *seq, void *offset)
@@ -486,98 +490,102 @@ static void free_hotkey_device(union acpi_hotkey *key)
 
 static void free_hotkey_buffer(union acpi_hotkey *key)
 {
+	/* key would never be null, action method could be */
 	kfree(key->event_hotkey.action_method);
 }
 
 static void free_poll_hotkey_buffer(union acpi_hotkey *key)
 {
+	/* key would never be null, others could be*/
 	kfree(key->poll_hotkey.action_method);
 	kfree(key->poll_hotkey.poll_method);
 	kfree(key->poll_hotkey.poll_result);
 }
 static int
-init_hotkey_device(union acpi_hotkey *key, char *bus_str, char *action_str,
-		   char *method, int std_num, int external_num)
+init_hotkey_device(union acpi_hotkey *key, char **config_entry,
+		   int std_num, int external_num)
 {
 	acpi_handle tmp_handle;
 	acpi_status status = AE_OK;
 
-
 	if (std_num < 0 || IS_POLL(std_num) || !key)
 		goto do_fail;
 
-	if (!bus_str || !action_str || !method)
+	if (!config_entry[bus_handle] || !config_entry[action_handle]
+			|| !config_entry[method])
 		goto do_fail;
 
 	key->link.hotkey_type = ACPI_HOTKEY_EVENT;
 	key->link.hotkey_standard_num = std_num;
 	key->event_hotkey.flag = 0;
-	key->event_hotkey.action_method = method;
+	key->event_hotkey.action_method = config_entry[method];
 
-	status =
-	    acpi_get_handle(NULL, bus_str, &(key->event_hotkey.bus_handle));
+	status = acpi_get_handle(NULL, config_entry[bus_handle],
+			   &(key->event_hotkey.bus_handle));
 	if (ACPI_FAILURE(status))
-		goto do_fail;
+		goto do_fail_zero;
 	key->event_hotkey.external_hotkey_num = external_num;
-	status =
-	    acpi_get_handle(NULL, action_str,
+	status = acpi_get_handle(NULL, config_entry[action_handle],
 			    &(key->event_hotkey.action_handle));
 	if (ACPI_FAILURE(status))
-		goto do_fail;
+		goto do_fail_zero;
 	status = acpi_get_handle(key->event_hotkey.action_handle,
-				 method, &tmp_handle);
+				 config_entry[method], &tmp_handle);
 	if (ACPI_FAILURE(status))
-		goto do_fail;
+		goto do_fail_zero;
 	return AE_OK;
-      do_fail:
+do_fail_zero:
+	key->event_hotkey.action_method = NULL;
+do_fail:
 	return -ENODEV;
 }
 
 static int
-init_poll_hotkey_device(union acpi_hotkey *key,
-			char *poll_str,
-			char *poll_method,
-			char *action_str, char *action_method, int std_num)
+init_poll_hotkey_device(union acpi_hotkey *key, char **config_entry,
+			int std_num)
 {
 	acpi_status status = AE_OK;
 	acpi_handle tmp_handle;
 
-
 	if (std_num < 0 || IS_EVENT(std_num) || !key)
 		goto do_fail;
-
-	if (!poll_str || !poll_method || !action_str || !action_method)
+	if (!config_entry[bus_handle] ||!config_entry[bus_method] ||
+		!config_entry[action_handle] || !config_entry[method])
 		goto do_fail;
 
 	key->link.hotkey_type = ACPI_HOTKEY_POLLING;
 	key->link.hotkey_standard_num = std_num;
 	key->poll_hotkey.flag = 0;
-	key->poll_hotkey.poll_method = poll_method;
-	key->poll_hotkey.action_method = action_method;
+	key->poll_hotkey.poll_method = config_entry[bus_method];
+	key->poll_hotkey.action_method = config_entry[method];
 
-	status =
-	    acpi_get_handle(NULL, poll_str, &(key->poll_hotkey.poll_handle));
+	status = acpi_get_handle(NULL, config_entry[bus_handle],
+		      &(key->poll_hotkey.poll_handle));
 	if (ACPI_FAILURE(status))
-		goto do_fail;
+		goto do_fail_zero;
 	status = acpi_get_handle(key->poll_hotkey.poll_handle,
-				 poll_method, &tmp_handle);
+				 config_entry[bus_method], &tmp_handle);
 	if (ACPI_FAILURE(status))
-		goto do_fail;
+		goto do_fail_zero;
 	status =
-	    acpi_get_handle(NULL, action_str,
+	    acpi_get_handle(NULL, config_entry[action_handle],
 			    &(key->poll_hotkey.action_handle));
 	if (ACPI_FAILURE(status))
-		goto do_fail;
+		goto do_fail_zero;
 	status = acpi_get_handle(key->poll_hotkey.action_handle,
-				 action_method, &tmp_handle);
+				 config_entry[method], &tmp_handle);
 	if (ACPI_FAILURE(status))
-		goto do_fail;
+		goto do_fail_zero;
 	key->poll_hotkey.poll_result =
 	    (union acpi_object *)kmalloc(sizeof(union acpi_object), GFP_KERNEL);
 	if (!key->poll_hotkey.poll_result)
-		goto do_fail;
+		goto do_fail_zero;
 	return AE_OK;
-      do_fail:
+
+do_fail_zero:
+	key->poll_hotkey.poll_method = NULL;
+	key->poll_hotkey.action_method = NULL;
+do_fail:
 	return -ENODEV;
 }
 
@@ -652,17 +660,18 @@ static int hotkey_poll_config_seq_show(struct seq_file *seq, void *offset)
 }
 
 static int
-get_parms(char *config_record,
-	  int *cmd,
-	  char **bus_handle,
-	  char **bus_method,
-	  char **action_handle,
-	  char **method, int *internal_event_num, int *external_event_num)
+get_parms(char *config_record, int *cmd, char **config_entry,
+	       int *internal_event_num, int *external_event_num)
 {
+/* the format of *config_record =
+ * "1:\d+:*" : "cmd:internal_event_num"
+ * "\d+:\w+:\w+:\w+:\w+:\d+:\d+" :
+ * "cmd:bus_handle:bus_method:action_handle:method:internal_event_num:external_event_num"
+ */
 	char *tmp, *tmp1, count;
+	int i;
 
 	sscanf(config_record, "%d", cmd);
-
 	if (*cmd == 1) {
 		if (sscanf(config_record, "%d:%d", cmd, internal_event_num) !=
 		    2)
@@ -674,59 +683,27 @@ get_parms(char *config_record,
 	if (!tmp)
 		goto do_fail;
 	tmp++;
-	tmp1 = strchr(tmp, ':');
-	if (!tmp1)
-		goto do_fail;
-
-	count = tmp1 - tmp;
-	*bus_handle = (char *)kmalloc(count + 1, GFP_KERNEL);
-	if (!*bus_handle)
-		goto do_fail;
-	strncpy(*bus_handle, tmp, count);
-	*(*bus_handle + count) = 0;
-
-	tmp = tmp1;
-	tmp++;
-	tmp1 = strchr(tmp, ':');
-	if (!tmp1)
-		goto do_fail;
-	count = tmp1 - tmp;
-	*bus_method = (char *)kmalloc(count + 1, GFP_KERNEL);
-	if (!*bus_method)
-		goto do_fail;
-	strncpy(*bus_method, tmp, count);
-	*(*bus_method + count) = 0;
-
-	tmp = tmp1;
-	tmp++;
-	tmp1 = strchr(tmp, ':');
-	if (!tmp1)
-		goto do_fail;
-	count = tmp1 - tmp;
-	*action_handle = (char *)kmalloc(count + 1, GFP_KERNEL);
-	if (!*action_handle)
-		goto do_fail;
-	strncpy(*action_handle, tmp, count);
-	*(*action_handle + count) = 0;
-
-	tmp = tmp1;
-	tmp++;
-	tmp1 = strchr(tmp, ':');
-	if (!tmp1)
-		goto do_fail;
-	count = tmp1 - tmp;
-	*method = (char *)kmalloc(count + 1, GFP_KERNEL);
-	if (!*method)
-		goto do_fail;
-	strncpy(*method, tmp, count);
-	*(*method + count) = 0;
-
-	if (sscanf(tmp1 + 1, "%d:%d", internal_event_num, external_event_num) <=
-	    0)
-		goto do_fail;
-
-	return 6;
-      do_fail:
+	for (i = 0; i < LAST_CONF_ENTRY; i++) {
+		tmp1 = strchr(tmp, ':');
+		if (!tmp1) {
+			goto do_fail;
+		}
+		count = tmp1 - tmp;
+		config_entry[i] = kzalloc(count + 1, GFP_KERNEL);
+		if (!config_entry[i])
+			goto handle_failure;
+		strncpy(config_entry[i], tmp, count);
+		tmp = tmp1 + 1;
+	}
+	if (sscanf(tmp, "%d:%d", internal_event_num, external_event_num) <= 0)
+		goto handle_failure;
+	if (!IS_OTHERS(*internal_event_num)) {
+		return 6;
+	}
+handle_failure:
+	while (i-- > 0)
+		kfree(config_entry[i]);
+do_fail:
 	return -1;
 }
 
@@ -736,50 +713,34 @@ static ssize_t hotkey_write_config(struct file *file,
 				   size_t count, loff_t * data)
 {
 	char *config_record = NULL;
-	char *bus_handle = NULL;
-	char *bus_method = NULL;
-	char *action_handle = NULL;
-	char *method = NULL;
+	char *config_entry[LAST_CONF_ENTRY];
 	int cmd, internal_event_num, external_event_num;
 	int ret = 0;
-	union acpi_hotkey *key = NULL;
+	union acpi_hotkey *key = kzalloc(sizeof(union acpi_hotkey), GFP_KERNEL);
 
+	if (!key)
+		return -ENOMEM;
 
-	config_record = (char *)kmalloc(count + 1, GFP_KERNEL);
-	if (!config_record)
+	config_record = kzalloc(count + 1, GFP_KERNEL);
+	if (!config_record) {
+		kfree(key);
 		return -ENOMEM;
+	}
 
 	if (copy_from_user(config_record, buffer, count)) {
 		kfree(config_record);
+		kfree(key);
 		printk(KERN_ERR PREFIX "Invalid data\n");
 		return -EINVAL;
 	}
-	config_record[count] = 0;
-
-	ret = get_parms(config_record,
-			&cmd,
-			&bus_handle,
-			&bus_method,
-			&action_handle,
-			&method, &internal_event_num, &external_event_num);
-
+	ret = get_parms(config_record, &cmd, config_entry,
+		       &internal_event_num, &external_event_num);
 	kfree(config_record);
-	if (IS_OTHERS(internal_event_num))
-		goto do_fail;
 	if (ret != 6) {
-	      do_fail:
-		kfree(bus_handle);
-		kfree(bus_method);
-		kfree(action_handle);
-		kfree(method);
 		printk(KERN_ERR PREFIX "Invalid data format ret=%d\n", ret);
 		return -EINVAL;
 	}
 
-	key = kmalloc(sizeof(union acpi_hotkey), GFP_KERNEL);
-	if (!key)
-		goto do_fail;
-	memset(key, 0, sizeof(union acpi_hotkey));
 	if (cmd == 1) {
 		union acpi_hotkey *tmp = NULL;
 		tmp = get_hotkey_by_event(&global_hotkey_list,
@@ -791,34 +752,19 @@ static ssize_t hotkey_write_config(struct file *file,
 		goto cont_cmd;
 	}
 	if (IS_EVENT(internal_event_num)) {
-		kfree(bus_method);
-		ret = init_hotkey_device(key, bus_handle, action_handle, method,
-					 internal_event_num,
-					 external_event_num);
-	} else
-		ret = init_poll_hotkey_device(key, bus_handle, bus_method,
-					      action_handle, method,
-					      internal_event_num);
-	if (ret) {
-		kfree(bus_handle);
-		kfree(action_handle);
-		if (IS_EVENT(internal_event_num))
-			free_hotkey_buffer(key);
-		else
-			free_poll_hotkey_buffer(key);
-		kfree(key);
-		printk(KERN_ERR PREFIX "Invalid hotkey\n");
-		return -EINVAL;
+		if (init_hotkey_device(key, config_entry,
+			internal_event_num, external_event_num))
+			goto init_hotkey_fail;
+	} else {
+		if (init_poll_hotkey_device(key, config_entry,
+			       internal_event_num))
+			goto init_poll_hotkey_fail;
 	}
-
-      cont_cmd:
-	kfree(bus_handle);
-	kfree(action_handle);
-
+cont_cmd:
 	switch (cmd) {
 	case 0:
-		if (get_hotkey_by_event
-		    (&global_hotkey_list, key->link.hotkey_standard_num))
+		if (get_hotkey_by_event(&global_hotkey_list,
+				key->link.hotkey_standard_num))
 			goto fail_out;
 		else
 			hotkey_add(key);
@@ -827,6 +773,7 @@ static ssize_t hotkey_write_config(struct file *file,
 		hotkey_remove(key);
 		break;
 	case 2:
+		/* key is kfree()ed if matched*/
 		if (hotkey_update(key))
 			goto fail_out;
 		break;
@@ -835,11 +782,22 @@ static ssize_t hotkey_write_config(struct file *file,
 		break;
 	}
 	return count;
-      fail_out:
-	if (IS_EVENT(internal_event_num))
-		free_hotkey_buffer(key);
-	else
-		free_poll_hotkey_buffer(key);
+
+init_poll_hotkey_fail:		/* failed init_poll_hotkey_device */
+	kfree(config_entry[bus_method]);
+	config_entry[bus_method] = NULL;
+init_hotkey_fail:		/* failed init_hotkey_device */
+	kfree(config_entry[method]);
+fail_out:
+	kfree(config_entry[bus_handle]);
+	kfree(config_entry[action_handle]);
+	/* No double free since elements =NULL for error cases */
+	if (IS_EVENT(internal_event_num)) {
+		if (config_entry[bus_method])
+			kfree(config_entry[bus_method]);
+		free_hotkey_buffer(key);	/* frees [method] */
+	} else
+		free_poll_hotkey_buffer(key);  /* frees [bus_method]+[method] */
 	kfree(key);
 	printk(KERN_ERR PREFIX "invalid key\n");
 	return -EINVAL;
@@ -923,10 +881,9 @@ static ssize_t hotkey_execute_aml_method(struct file *file,
 	union acpi_hotkey *key;
 
 
-	arg = (char *)kmalloc(count + 1, GFP_KERNEL);
+	arg = kzalloc(count + 1, GFP_KERNEL);
 	if (!arg)
 		return -ENOMEM;
-	arg[count] = 0;
 
 	if (copy_from_user(arg, buffer, count)) {
 		kfree(arg);
-- 
cgit v0.10.2


From d68909f4c3eee09c13d4e5c86512c6c075553dbd Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 16 Aug 2006 19:16:58 -0400
Subject: ACPI: avoid irqrouter_resume might_sleep oops on resume from S4

__might_sleep+0x8e/0x93
acpi_os_wait_semaphore+0x50/0xa3
acpi_ut_acquire_mutex+0x28/0x6a
acpi_ns_get_node+0x46/0x88
acpi_ns_evaluate+0x2d/0xfc
acpi_rs_set_srs_method_data+0xc5/0xe1
acpi_set_current_resources+0x31/0x3f
acpi_pci_link_set+0xfc/0x1a5
irqrouter_resume+0x48/0x5f

and

__might_sleep+0x8e/0x93
kmem_cache_alloc+0x2a/0x8f
acpi_evaluate_integer+0x32/0x96
acpi_bus_get_status+0x30/0x84
acpi_pci_link_set+0x12a/0x1a5
irqrouter_resume+0x48/0x5f

http://bugzilla.kernel.org/show_bug.cgi?id=6810

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index b7d1514..507f051 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -746,6 +746,16 @@ acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout)
 	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Waiting for semaphore[%p|%d|%d]\n",
 			  handle, units, timeout));
 
+	/*
+	 * This can be called during resume with interrupts off.
+	 * Like boot-time, we should be single threaded and will
+	 * always get the lock if we try -- timeout or not.
+	 * If this doesn't succeed, then we will oops courtesy of
+	 * might_sleep() in down().
+	 */
+	if (!down_trylock(sem))
+		return AE_OK;
+
 	switch (timeout) {
 		/*
 		 * No Wait:
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index f48227f..d0d84c4 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -262,7 +262,7 @@ acpi_evaluate_integer(acpi_handle handle,
 	if (!data)
 		return AE_BAD_PARAMETER;
 
-	element = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
+	element = kmalloc(sizeof(union acpi_object), irqs_disabled() ? GFP_ATOMIC: GFP_KERNEL);
 	if (!element)
 		return AE_NO_MEMORY;
 
-- 
cgit v0.10.2


From f57e1abd1bb297994c7398478b4c37e628095243 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 18 Aug 2006 15:32:10 +0100
Subject: [ARM] 3753/1: S3C24XX: DMA fixes

Patch from Ben Dooks

A number of small issues with the S3C24XX DMA have
cropped up, which this patch fixes. These are:

  - check wether we can load another buff in start
  - update state handling in s3c2410_dma_lastxfer
  - only reload in irq if channel is not idle
  - more informative timeout errors (add source)
  - do not call request_irq() with irqs locked
  - added waitforstop function

The patch also adds a S3C2410_DMAOP_STARTED for
the occasions when the driver wants to ensure that
the DMA system load state is resynced after loading.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index 094cc52..2585545 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -112,7 +112,7 @@ dmadbg_capture(s3c2410_dma_chan_t *chan, struct s3c2410_dma_regstate *regs)
 }
 
 static void
-dmadbg_showregs(const char *fname, int line, s3c2410_dma_chan_t *chan,
+dmadbg_dumpregs(const char *fname, int line, s3c2410_dma_chan_t *chan,
 		 struct s3c2410_dma_regstate *regs)
 {
 	printk(KERN_DEBUG "dma%d: %s:%d: DCSRC=%08lx, DISRC=%08lx, DSTAT=%08lx DMT=%02lx, DCON=%08lx\n",
@@ -132,7 +132,16 @@ dmadbg_showchan(const char *fname, int line, s3c2410_dma_chan_t *chan)
 	       chan->number, fname, line, chan->load_state,
 	       chan->curr, chan->next, chan->end);
 
-	dmadbg_showregs(fname, line, chan, &state);
+	dmadbg_dumpregs(fname, line, chan, &state);
+}
+
+static void
+dmadbg_showregs(const char *fname, int line, s3c2410_dma_chan_t *chan)
+{
+	struct s3c2410_dma_regstate state;
+
+	dmadbg_capture(chan, &state);
+	dmadbg_dumpregs(fname, line, chan, &state);
 }
 
 #define dbg_showregs(chan) dmadbg_showregs(__FUNCTION__, __LINE__, (chan))
@@ -253,10 +262,14 @@ s3c2410_dma_loadbuffer(s3c2410_dma_chan_t *chan,
 			 buf->next);
 		reload = (buf->next == NULL) ? S3C2410_DCON_NORELOAD : 0;
 	} else {
-		pr_debug("load_state is %d => autoreload\n", chan->load_state);
+		//pr_debug("load_state is %d => autoreload\n", chan->load_state);
 		reload = S3C2410_DCON_AUTORELOAD;
 	}
 
+	if ((buf->data & 0xf0000000) != 0x30000000) {
+		dmawarn("dmaload: buffer is %p\n", (void *)buf->data);
+	}
+
 	writel(buf->data, chan->addr_reg);
 
 	dma_wrreg(chan, S3C2410_DMA_DCON,
@@ -370,7 +383,7 @@ static int s3c2410_dma_start(s3c2410_dma_chan_t *chan)
 	tmp |= S3C2410_DMASKTRIG_ON;
 	dma_wrreg(chan, S3C2410_DMA_DMASKTRIG, tmp);
 
-	pr_debug("wrote %08lx to DMASKTRIG\n", tmp);
+	pr_debug("dma%d: %08lx to DMASKTRIG\n", chan->number, tmp);
 
 #if 0
 	/* the dma buffer loads should take care of clearing the AUTO
@@ -384,7 +397,30 @@ static int s3c2410_dma_start(s3c2410_dma_chan_t *chan)
 
 	dbg_showchan(chan);
 
+	/* if we've only loaded one buffer onto the channel, then chec
+	 * to see if we have another, and if so, try and load it so when
+	 * the first buffer is finished, the new one will be loaded onto
+	 * the channel */
+
+	if (chan->next != NULL) {
+		if (chan->load_state == S3C2410_DMALOAD_1LOADED) {
+
+			if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
+				pr_debug("%s: buff not yet loaded, no more todo\n",
+					 __FUNCTION__);
+			} else {
+				chan->load_state = S3C2410_DMALOAD_1RUNNING;
+				s3c2410_dma_loadbuffer(chan, chan->next);
+			}
+
+		} else if (chan->load_state == S3C2410_DMALOAD_1RUNNING) {
+			s3c2410_dma_loadbuffer(chan, chan->next);
+		}
+	}
+
+
 	local_irq_restore(flags);
+
 	return 0;
 }
 
@@ -436,12 +472,11 @@ int s3c2410_dma_enqueue(unsigned int channel, void *id,
 	buf = kmem_cache_alloc(dma_kmem, GFP_ATOMIC);
 	if (buf == NULL) {
 		pr_debug("%s: out of memory (%ld alloc)\n",
-			 __FUNCTION__, sizeof(*buf));
+			 __FUNCTION__, (long)sizeof(*buf));
 		return -ENOMEM;
 	}
 
-	pr_debug("%s: new buffer %p\n", __FUNCTION__, buf);
-
+	//pr_debug("%s: new buffer %p\n", __FUNCTION__, buf);
 	//dbg_showchan(chan);
 
 	buf->next  = NULL;
@@ -537,14 +572,20 @@ s3c2410_dma_lastxfer(s3c2410_dma_chan_t *chan)
 	case S3C2410_DMALOAD_1LOADED:
 		if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
 				/* flag error? */
-			printk(KERN_ERR "dma%d: timeout waiting for load\n",
-			       chan->number);
+			printk(KERN_ERR "dma%d: timeout waiting for load (%s)\n",
+			       chan->number, __FUNCTION__);
 			return;
 		}
 		break;
 
+	case S3C2410_DMALOAD_1LOADED_1RUNNING:
+		/* I belive in this case we do not have anything to do
+		 * until the next buffer comes along, and we turn off the
+		 * reload */
+		return;
+
 	default:
-		pr_debug("dma%d: lastxfer: unhandled load_state %d with no next",
+		pr_debug("dma%d: lastxfer: unhandled load_state %d with no next\n",
 			 chan->number, chan->load_state);
 		return;
 
@@ -629,7 +670,14 @@ s3c2410_dma_irq(int irq, void *devpw, struct pt_regs *regs)
 	} else {
 	}
 
-	if (chan->next != NULL) {
+	/* only reload if the channel is still running... our buffer done
+	 * routine may have altered the state by requesting the dma channel
+	 * to stop or shutdown... */
+
+	/* todo: check that when the channel is shut-down from inside this
+	 * function, we cope with unsetting reload, etc */
+
+	if (chan->next != NULL && chan->state != S3C2410_DMA_IDLE) {
 		unsigned long flags;
 
 		switch (chan->load_state) {
@@ -644,8 +692,8 @@ s3c2410_dma_irq(int irq, void *devpw, struct pt_regs *regs)
 		case S3C2410_DMALOAD_1LOADED:
 			if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
 				/* flag error? */
-				printk(KERN_ERR "dma%d: timeout waiting for load\n",
-				       chan->number);
+				printk(KERN_ERR "dma%d: timeout waiting for load (%s)\n",
+				       chan->number, __FUNCTION__);
 				return IRQ_HANDLED;
 			}
 
@@ -678,8 +726,6 @@ s3c2410_dma_irq(int irq, void *devpw, struct pt_regs *regs)
 	return IRQ_HANDLED;
 }
 
-
-
 /* s3c2410_request_dma
  *
  * get control of an dma channel
@@ -718,11 +764,17 @@ int s3c2410_dma_request(unsigned int channel, s3c2410_dma_client_t *client,
 		pr_debug("dma%d: %s : requesting irq %d\n",
 			 channel, __FUNCTION__, chan->irq);
 
+		chan->irq_claimed = 1;
+		local_irq_restore(flags);
+
 		err = request_irq(chan->irq, s3c2410_dma_irq, IRQF_DISABLED,
 				  client->name, (void *)chan);
 
+		local_irq_save(flags);
+
 		if (err) {
 			chan->in_use = 0;
+			chan->irq_claimed = 0;
 			local_irq_restore(flags);
 
 			printk(KERN_ERR "%s: cannot get IRQ %d for DMA %d\n",
@@ -730,7 +782,6 @@ int s3c2410_dma_request(unsigned int channel, s3c2410_dma_client_t *client,
 			return err;
 		}
 
-		chan->irq_claimed = 1;
 		chan->irq_enabled = 1;
 	}
 
@@ -810,6 +861,7 @@ static int s3c2410_dma_dostop(s3c2410_dma_chan_t *chan)
 
 	tmp = dma_rdreg(chan, S3C2410_DMA_DMASKTRIG);
 	tmp |= S3C2410_DMASKTRIG_STOP;
+	//tmp &= ~S3C2410_DMASKTRIG_ON;
 	dma_wrreg(chan, S3C2410_DMA_DMASKTRIG, tmp);
 
 #if 0
@@ -819,6 +871,7 @@ static int s3c2410_dma_dostop(s3c2410_dma_chan_t *chan)
 	dma_wrreg(chan, S3C2410_DMA_DCON, tmp);
 #endif
 
+	/* should stop do this, or should we wait for flush? */
 	chan->state      = S3C2410_DMA_IDLE;
 	chan->load_state = S3C2410_DMALOAD_NONE;
 
@@ -827,6 +880,22 @@ static int s3c2410_dma_dostop(s3c2410_dma_chan_t *chan)
 	return 0;
 }
 
+void s3c2410_dma_waitforstop(s3c2410_dma_chan_t *chan)
+{
+	unsigned long tmp;
+	unsigned int timeout = 0x10000;
+
+	while (timeout-- > 0) {
+		tmp = dma_rdreg(chan, S3C2410_DMA_DMASKTRIG);
+
+		if (!(tmp & S3C2410_DMASKTRIG_ON))
+			return;
+	}
+
+	pr_debug("dma%d: failed to stop?\n", chan->number);
+}
+
+
 /* s3c2410_dma_flush
  *
  * stop the channel, and remove all current and pending transfers
@@ -837,7 +906,9 @@ static int s3c2410_dma_flush(s3c2410_dma_chan_t *chan)
 	s3c2410_dma_buf_t *buf, *next;
 	unsigned long flags;
 
-	pr_debug("%s:\n", __FUNCTION__);
+	pr_debug("%s: chan %p (%d)\n", __FUNCTION__, chan, chan->number);
+
+	dbg_showchan(chan);
 
 	local_irq_save(flags);
 
@@ -864,11 +935,64 @@ static int s3c2410_dma_flush(s3c2410_dma_chan_t *chan)
 		}
 	}
 
+	dbg_showregs(chan);
+
+	s3c2410_dma_waitforstop(chan);
+
+#if 0
+	/* should also clear interrupts, according to WinCE BSP */
+	{
+		unsigned long tmp;
+
+		tmp = dma_rdreg(chan, S3C2410_DMA_DCON);
+		tmp |= S3C2410_DCON_NORELOAD;
+		dma_wrreg(chan, S3C2410_DMA_DCON, tmp);
+	}
+#endif
+
+	dbg_showregs(chan);
+
 	local_irq_restore(flags);
 
 	return 0;
 }
 
+int
+s3c2410_dma_started(s3c2410_dma_chan_t *chan)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	dbg_showchan(chan);
+
+	/* if we've only loaded one buffer onto the channel, then chec
+	 * to see if we have another, and if so, try and load it so when
+	 * the first buffer is finished, the new one will be loaded onto
+	 * the channel */
+
+	if (chan->next != NULL) {
+		if (chan->load_state == S3C2410_DMALOAD_1LOADED) {
+
+			if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
+				pr_debug("%s: buff not yet loaded, no more todo\n",
+					 __FUNCTION__);
+			} else {
+				chan->load_state = S3C2410_DMALOAD_1RUNNING;
+				s3c2410_dma_loadbuffer(chan, chan->next);
+			}
+
+		} else if (chan->load_state == S3C2410_DMALOAD_1RUNNING) {
+			s3c2410_dma_loadbuffer(chan, chan->next);
+		}
+	}
+
+
+	local_irq_restore(flags);
+
+	return 0;
+
+}
 
 int
 s3c2410_dma_ctrl(dmach_t channel, s3c2410_chan_op_t op)
@@ -885,14 +1009,15 @@ s3c2410_dma_ctrl(dmach_t channel, s3c2410_chan_op_t op)
 		return s3c2410_dma_dostop(chan);
 
 	case S3C2410_DMAOP_PAUSE:
-		return -ENOENT;
-
 	case S3C2410_DMAOP_RESUME:
 		return -ENOENT;
 
 	case S3C2410_DMAOP_FLUSH:
 		return s3c2410_dma_flush(chan);
 
+	case S3C2410_DMAOP_STARTED:
+		return s3c2410_dma_started(chan);
+
 	case S3C2410_DMAOP_TIMEOUT:
 		return 0;
 
diff --git a/include/asm-arm/arch-s3c2410/dma.h b/include/asm-arm/arch-s3c2410/dma.h
index 72964f9..7463fd5 100644
--- a/include/asm-arm/arch-s3c2410/dma.h
+++ b/include/asm-arm/arch-s3c2410/dma.h
@@ -104,6 +104,7 @@ enum s3c2410_chan_op_e {
 	S3C2410_DMAOP_RESUME,
 	S3C2410_DMAOP_FLUSH,
 	S3C2410_DMAOP_TIMEOUT,           /* internal signal to handler */
+	S3C2410_DMAOP_STARTED,		/* indicate channel started */
 };
 
 typedef enum s3c2410_chan_op_e s3c2410_chan_op_t;
-- 
cgit v0.10.2


From 332158e7c206dc6bee0dfb24f2c1d6096e0989cd Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 18 Aug 2006 15:32:12 +0100
Subject: [ARM] 3754/1: S3C24XX: tidy arch/arm/mach-s3c2410/Makefile

Patch from Ben Dooks

tidy up the makefile by using TABs to indent, and ensure
that all items are indented the same.

Move the DMA to its own section, ready for the next set
of updates

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-s3c2410/Makefile b/arch/arm/mach-s3c2410/Makefile
index 0c79386..273e05f 100644
--- a/arch/arm/mach-s3c2410/Makefile
+++ b/arch/arm/mach-s3c2410/Makefile
@@ -10,45 +10,47 @@ obj-m			:=
 obj-n			:=
 obj-			:=
 
+# DMA
+obj-$(CONFIG_S3C2410_DMA)	+= dma.o
+
 # S3C2400 support files
-obj-$(CONFIG_CPU_S3C2400)  += s3c2400-gpio.o
+obj-$(CONFIG_CPU_S3C2400)	+= s3c2400-gpio.o
 
 # S3C2410 support files
 
-obj-$(CONFIG_CPU_S3C2410)  += s3c2410.o
-obj-$(CONFIG_CPU_S3C2410)  += s3c2410-gpio.o
-obj-$(CONFIG_S3C2410_DMA)  += dma.o
+obj-$(CONFIG_CPU_S3C2410)	+= s3c2410.o
+obj-$(CONFIG_CPU_S3C2410)	+= s3c2410-gpio.o
 
 # Power Management support
 
-obj-$(CONFIG_PM)	   += pm.o sleep.o
-obj-$(CONFIG_PM_SIMTEC)	   += pm-simtec.o
+obj-$(CONFIG_PM)		+= pm.o sleep.o
+obj-$(CONFIG_PM_SIMTEC)		+= pm-simtec.o
 
 # S3C2412 support
-obj-$(CONFIG_CPU_S3C2412)  += s3c2412.o
-obj-$(CONFIG_CPU_S3C2412)  += s3c2412-clock.o
+obj-$(CONFIG_CPU_S3C2412)	+= s3c2412.o
+obj-$(CONFIG_CPU_S3C2412)	+= s3c2412-clock.o
 
 #
 # S3C244X support
 
-obj-$(CONFIG_CPU_S3C244X)  += s3c244x.o
-obj-$(CONFIG_CPU_S3C244X)  += s3c244x-irq.o
+obj-$(CONFIG_CPU_S3C244X)	+= s3c244x.o
+obj-$(CONFIG_CPU_S3C244X)	+= s3c244x-irq.o
 
 # Clock control
 
-obj-$(CONFIG_S3C2410_CLOCK) += s3c2410-clock.o
+obj-$(CONFIG_S3C2410_CLOCK)	+= s3c2410-clock.o
 
 # S3C2440 support
 
-obj-$(CONFIG_CPU_S3C2440)  += s3c2440.o s3c2440-dsc.o
-obj-$(CONFIG_CPU_S3C2440)  += s3c2440-irq.o
-obj-$(CONFIG_CPU_S3C2440)  += s3c2440-clock.o
-obj-$(CONFIG_CPU_S3C2440)  += s3c2410-gpio.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2440.o s3c2440-dsc.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2440-irq.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2440-clock.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2410-gpio.o
 
 # S3C2442 support
 
-obj-$(CONFIG_CPU_S3C2442)  += s3c2442.o
-obj-$(CONFIG_CPU_S3C2442)  += s3c2442-clock.o
+obj-$(CONFIG_CPU_S3C2442)	+= s3c2442.o
+obj-$(CONFIG_CPU_S3C2442)	+= s3c2442-clock.o
 
 # bast extras
 
-- 
cgit v0.10.2


From e2785f0d4251c35d6e82ec819354374c8c17d63d Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@mvista.com>
Date: Fri, 18 Aug 2006 15:32:14 +0100
Subject: [ARM] 3755/1: dmabounce: fix return value for find_safe_buffer

Patch from Kevin Hilman

Previous locking changes to dmabounce incorrectly return non-NULL even
when buffer not found.  Fix it up.

Signed-off-by: Kevin Hilman <khilman@mvista.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 5b7c263..028bdc9 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -179,17 +179,19 @@ alloc_safe_buffer(struct dmabounce_device_info *device_info, void *ptr,
 static inline struct safe_buffer *
 find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_addr)
 {
-	struct safe_buffer *b = NULL;
+	struct safe_buffer *b, *rb = NULL;
 	unsigned long flags;
 
 	read_lock_irqsave(&device_info->lock, flags);
 
 	list_for_each_entry(b, &device_info->safe_buffers, node)
-		if (b->safe_dma_addr == safe_dma_addr)
+		if (b->safe_dma_addr == safe_dma_addr) {
+			rb = b;
 			break;
+		}
 
 	read_unlock_irqrestore(&device_info->lock, flags);
-	return b;
+	return rb;
 }
 
 static inline void
-- 
cgit v0.10.2


From 3a834635e8953839dba3f417a26414039b0c7c48 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@gmail.com>
Date: Fri, 18 Aug 2006 15:32:17 +0100
Subject: [ARM] 3756/1: Assign value for HWCAP_IWMMXT

Patch from Paul Gortmaker

Some folks here at Wind River asked me if I'd push this out
so that the value was generally agreed upon in advance by
all folks interested in working with iWMMXt.  Seems simple
enough...

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/include/asm-arm/procinfo.h b/include/asm-arm/procinfo.h
index edb7b65..91a31ad 100644
--- a/include/asm-arm/procinfo.h
+++ b/include/asm-arm/procinfo.h
@@ -55,5 +55,6 @@ extern unsigned int elf_hwcap;
 #define HWCAP_VFP	64
 #define HWCAP_EDSP	128
 #define HWCAP_JAVA	256
+#define HWCAP_IWMMXT	512
 
 #endif
-- 
cgit v0.10.2


From 90af774ab150b97c0aa8ed0375f4652be986bdce Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 18 Aug 2006 15:34:46 +0100
Subject: [ARM] 3757/1: Use PROCINFO_INITFUNC in head.S

Patch from Catalin Marinas

This is instead of a magic number.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 4fe386e..5365d4e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -118,7 +118,7 @@ ENTRY(secondary_startup)
 	sub	r4, r4, r5			@ mmu has been enabled
 	ldr	r4, [r7, r4]			@ get secondary_data.pgdir
 	adr	lr, __enable_mmu		@ return address
-	add	pc, r10, #12			@ initialise processor
+	add	pc, r10, #PROCINFO_INITFUNC	@ initialise processor
 						@ (return control reg)
 
 	/*
-- 
cgit v0.10.2


From ba9b5d76372dc290b6ca04dad93927a22c2ac49a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 18 Aug 2006 17:20:15 +0100
Subject: [ARM] 3746/2: Userspace helpers must be Thumb mode interworkable

Patch from Nicolas Pitre

The userspace helpers in clean/arch/arm/kernel/entry-armv.S are called
directly in/from userspace. They need to cope with being called from
Thumb code.

Patch below uses the bx interworking instruction when
CONFIG_ARM_THUMB=y.

Based on an earlier patch from Paul Brook <paul@codesourcery.com>

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7ea5f01..de4e331 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -634,6 +634,14 @@ ENTRY(__switch_to)
  * purpose.
  */
 
+	.macro	usr_ret, reg
+#ifdef CONFIG_ARM_THUMB
+	bx	\reg
+#else
+	mov	pc, \reg
+#endif
+	.endm
+
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
@@ -675,7 +683,7 @@ __kuser_memory_barrier:				@ 0xffff0fa0
 #if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_SMP)
 	mcr	p15, 0, r0, c7, c10, 5	@ dmb
 #endif
-	mov	pc, lr
+	usr_ret	lr
 
 	.align	5
 
@@ -778,7 +786,7 @@ __kuser_cmpxchg:				@ 0xffff0fc0
 	mov	r0, #-1
 	adds	r0, r0, #0
 #endif
-	mov	pc, lr
+	usr_ret	lr
 
 #else
 
@@ -792,7 +800,7 @@ __kuser_cmpxchg:				@ 0xffff0fc0
 #ifdef CONFIG_SMP
 	mcr	p15, 0, r0, c7, c10, 5	@ dmb
 #endif
-	mov	pc, lr
+	usr_ret	lr
 
 #endif
 
@@ -834,16 +842,11 @@ __kuser_cmpxchg:				@ 0xffff0fc0
 __kuser_get_tls:				@ 0xffff0fe0
 
 #if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
-
 	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
-	mov	pc, lr
-
 #else
-
 	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
-	mov	pc, lr
-
 #endif
+	usr_ret	lr
 
 	.rep	5
 	.word	0			@ pad up to __kuser_helper_version
-- 
cgit v0.10.2


From df6fd31995cb2e38b2a7e94bc8f1559b8f55404e Mon Sep 17 00:00:00 2001
From: "Starikovskiy, Alexey Y" <alexey.y.starikovskiy@intel.com>
Date: Fri, 18 Aug 2006 11:23:00 -0400
Subject: ACPI: relax BAD_MADT_ENTRY check to allow LSAPIC variable length
 string UIDs

ACPI 3.0 appended a variable length UID string to the LAPIC structure
as part of support for > 256 processors.  So the BAD_MADT_ENTRY() sanity
check can no longer compare for equality with a fixed structure length.

Signed-off-by: Alexey Y Starikovskiy <alexey.y.starikovskiy@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 0db6387..ee003bc 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -59,7 +59,7 @@ static inline int gsi_irq_sharing(int gsi) { return gsi; }
 
 #define BAD_MADT_ENTRY(entry, end) (					    \
 		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
-		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+		((acpi_table_entry_header *)entry)->length < sizeof(*entry))
 
 #define PREFIX			"ACPI: "
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 99761b8..0176556 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -55,7 +55,7 @@
 
 #define BAD_MADT_ENTRY(entry, end) (                                        \
 		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
-		((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+		((acpi_table_entry_header *)entry)->length < sizeof(*entry))
 
 #define PREFIX			"ACPI: "
 
-- 
cgit v0.10.2


From 5beba53230351b2d77c317c22e66c415f2ebaf02 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 18 Aug 2006 10:41:46 -0700
Subject: IB/mthca: No userspace SRQs if HCA doesn't have SRQ support

Leave all SRQ methods out of the device's uverbs_cmd_mask if the
device doesn't have SRQ support (because of ancient firmware) so that
we don't allow userspace to call the driver's create_srq method.  This
fixes a userspace-triggerable oops caused by ib_uverbs_create_srq()
following the device's ->create_srq function pointer, which will be
NULL if the device doesn't support SRQs.

Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 230ae21..265b1d1 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1287,11 +1287,7 @@ int mthca_register_device(struct mthca_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
 	dev->ib_dev.node_type            = IB_NODE_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
@@ -1316,6 +1312,11 @@ int mthca_register_device(struct mthca_dev *dev)
 		dev->ib_dev.modify_srq           = mthca_modify_srq;
 		dev->ib_dev.query_srq            = mthca_query_srq;
 		dev->ib_dev.destroy_srq          = mthca_destroy_srq;
+		dev->ib_dev.uverbs_cmd_mask	|=
+			(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 
 		if (mthca_is_memfree(dev))
 			dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
-- 
cgit v0.10.2


From 33ccf8d1080bdccb4751a92f6da361a6e01b7cc0 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Thu, 17 Aug 2006 11:57:58 -0400
Subject: [SCSI] lpfc 8.1.9 : Misc Bug Fixes

Misc Bug Fixes:
- Cap MBX_DOWN_LINK command timeout to 60 seconds
- Fix double free of ndlp object
- Don't free mbox structures on error. The completion handlers expect to do so.
- Clear host attention work items when going offline
- Fixed discovery issues in multi-initiator environments.

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 76f8bd5..d384c16 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -222,7 +222,7 @@ lpfc_issue_lip(struct Scsi_Host *host)
 	pmboxq->mb.mbxCommand = MBX_DOWN_LINK;
 	pmboxq->mb.mbxOwner = OWN_HOST;
 
-	mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+	mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2);
 
 	if ((mbxstatus == MBX_SUCCESS) && (pmboxq->mb.mbxStatus == 0)) {
 		memset((void *)pmboxq, 0, sizeof (LPFC_MBOXQ_t));
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 60f5cca..3567de61 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1848,9 +1848,12 @@ static void
 lpfc_cmpl_els_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
 		  struct lpfc_iocbq * rspiocb)
 {
+	IOCB_t *irsp;
 	struct lpfc_nodelist *ndlp;
 	LPFC_MBOXQ_t *mbox = NULL;
 
+	irsp = &rspiocb->iocb;
+
 	ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
 	if (cmdiocb->context_un.mbox)
 		mbox = cmdiocb->context_un.mbox;
@@ -1893,9 +1896,15 @@ lpfc_cmpl_els_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
 			mempool_free( mbox, phba->mbox_mem_pool);
 		} else {
 			mempool_free( mbox, phba->mbox_mem_pool);
-			if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
-				lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
-				ndlp = NULL;
+			/* Do not call NO_LIST for lpfc_els_abort'ed ELS cmds */
+			if (!((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+			      ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+			       (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+			       (irsp->un.ulpWord[4] == IOERR_SLI_DOWN)))) {
+				if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
+					lpfc_nlp_list(phba, ndlp, NLP_NO_LIST);
+					ndlp = NULL;
+				}
 			}
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 1c3f268..b2f1552 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1557,6 +1557,8 @@ lpfc_freenode(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
 			mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
 		}
 	}
+
+	spin_lock_irq(phba->host->host_lock);
 	list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
 		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
@@ -1569,6 +1571,7 @@ lpfc_freenode(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
 			mempool_free(mb, phba->mbox_mem_pool);
 		}
 	}
+	spin_unlock_irq(phba->host->host_lock);
 
 	lpfc_els_abort(phba,ndlp,0);
 	spin_lock_irq(phba->host->host_lock);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 16dc8c8..f6948ff 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1379,6 +1379,7 @@ lpfc_offline(struct lpfc_hba * phba)
 	/* stop all timers associated with this hba */
 	lpfc_stop_timer(phba);
 	phba->work_hba_events = 0;
+	phba->work_ha = 0;
 
 	lpfc_printf_log(phba,
 		       KERN_WARNING,
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index b38021a..20449a8 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -393,6 +393,20 @@ lpfc_rcv_plogi(struct lpfc_hba * phba,
 	mbox->context2  = ndlp;
 	ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI);
 
+	/*
+	 * If there is an outstanding PLOGI issued, abort it before
+	 * sending ACC rsp for received PLOGI. If pending plogi
+	 * is not canceled here, the plogi will be rejected by
+	 * remote port and will be retried. On a configuration with
+	 * single discovery thread, this will cause a huge delay in
+	 * discovery. Also this will cause multiple state machines
+	 * running in parallel for this node.
+	 */
+	if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) {
+		/* software abort outstanding PLOGI */
+		lpfc_els_abort(phba, ndlp, 1);
+	}
+
 	lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0);
 	return 1;
 
@@ -1601,7 +1615,13 @@ lpfc_rcv_padisc_npr_node(struct lpfc_hba * phba,
 
 	lpfc_rcv_padisc(phba, ndlp, cmdiocb);
 
-	if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
+	/*
+	 * Do not start discovery if discovery is about to start
+	 * or discovery in progress for this node. Starting discovery
+	 * here will affect the counting of discovery threads.
+	 */
+	if ((!(ndlp->nlp_flag & NLP_DELAY_TMO)) &&
+		(ndlp->nlp_flag & NLP_NPR_2B_DISC)){
 		if (ndlp->nlp_flag & NLP_NPR_ADISC) {
 			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
 			ndlp->nlp_state = NLP_STE_ADISC_ISSUE;
-- 
cgit v0.10.2


From a90f56847e8df9034c1c05d1157e1b0cd96987fb Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Thu, 17 Aug 2006 11:58:04 -0400
Subject: [SCSI] lpfc 8.1.9 : Stall eh handlers if resetting while rport
 blocked

Stall error handler if attempting resets/aborts while an rport is blocked.
This avoids device offline scenarios due to errors in the error handler.

Background:
  Although the transport is using the scsi_timed_out functionality to
  restart the timeout if the rport is blocked, if the timeout has already
  fired before the block occurs, the eh handler still runs and can take
  the device offline. Ultimately, this window cannot be resolved without
  significant work in the error handler thread. Christoph noted the first
  level of these issues when he noted the poor error response handling
  by the error thread.

  We found, under heavy load and error testing, that time window from when
  the scsi_times_out() adds the io to the queue to when the scsi_error_handler
  gets around to servicing it, can be in the several seconds range. In most
  cases, these test conditions are highly unusual, but possible.
  As a result, we're stalling the error handler in this race window so that
  we can avoid the device_offline transitions.

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 0811c82..a8816a8 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -21,6 +21,7 @@
 
 #include <linux/pci.h>
 #include <linux/interrupt.h>
+#include <linux/delay.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
@@ -841,6 +842,21 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 	return 0;
 }
 
+static void
+lpfc_block_error_handler(struct scsi_cmnd *cmnd)
+{
+	struct Scsi_Host *shost = cmnd->device->host;
+	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
+
+	spin_lock_irq(shost->host_lock);
+	while (rport->port_state == FC_PORTSTATE_BLOCKED) {
+		spin_unlock_irq(shost->host_lock);
+		msleep(1000);
+		spin_lock_irq(shost->host_lock);
+	}
+	spin_unlock_irq(shost->host_lock);
+	return;
+}
 
 static int
 lpfc_abort_handler(struct scsi_cmnd *cmnd)
@@ -855,6 +871,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	unsigned int loop_count = 0;
 	int ret = SUCCESS;
 
+	lpfc_block_error_handler(cmnd);
 	spin_lock_irq(shost->host_lock);
 
 	lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble;
@@ -957,6 +974,7 @@ lpfc_reset_lun_handler(struct scsi_cmnd *cmnd)
 	int ret = FAILED;
 	int cnt, loopcnt;
 
+	lpfc_block_error_handler(cmnd);
 	spin_lock_irq(shost->host_lock);
 	/*
 	 * If target is not in a MAPPED state, delay the reset until
@@ -1073,6 +1091,7 @@ lpfc_reset_bus_handler(struct scsi_cmnd *cmnd)
 	int cnt, loopcnt;
 	struct lpfc_scsi_buf * lpfc_cmd;
 
+	lpfc_block_error_handler(cmnd);
 	spin_lock_irq(shost->host_lock);
 
 	lpfc_cmd = lpfc_get_scsi_buf(phba);
-- 
cgit v0.10.2


From 48e2691f9c3bd6719e773f0b883449f4cef39585 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Thu, 17 Aug 2006 11:58:11 -0400
Subject: [SCSI] lpfc 8.1.9 : Change version number to 8.1.9

Change version number to 8.1.9

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index c7963f9..c7091ea 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.1.8"
+#define LPFC_DRIVER_VERSION "8.1.9"
 
 #define LPFC_DRIVER_NAME "lpfc"
 
-- 
cgit v0.10.2


From 78bd4d484f81a611ef6ff02f909e576cb9aac7f2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 21 Aug 2006 08:33:23 +0200
Subject: [PATCH] sys_ioprio_set: minor do_each_thread+break fix

From include/linux/sched.h:

         * Careful: do_each_thread/while_each_thread is a double loop so
         *          'break' will not work as expected - use goto instead.
         */

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/ioprio.c b/fs/ioprio.c
index 93aa571..3db3103 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -111,9 +111,9 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
-					break;
+					goto free_uid;
 			} while_each_thread(g, p);
-
+free_uid:
 			if (who)
 				free_uid(user);
 			break;
-- 
cgit v0.10.2


From 9f83e45eb54fc7198dc59fc63255341851ba4c48 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 21 Aug 2006 08:34:15 +0200
Subject: [PATCH] Fix current_io_context() vs set_task_ioprio() race

I know nothing about io scheduler, but I suspect set_task_ioprio() is not safe.

current_io_context() initializes "struct io_context", then sets ->io_context.
set_task_ioprio() running on another cpu may see the changes out of order, so
->set_ioprio(ioc) may use io_context which was not initialized properly.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 61d6b3c..ddd9253 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3628,6 +3628,8 @@ struct io_context *current_io_context(gfp_t gfp_flags)
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->cic_root.rb_node = NULL;
+		/* make sure set_task_ioprio() sees the settings above */
+		smp_wmb();
 		tsk->io_context = ret;
 	}
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 3db3103..0657831 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -44,6 +44,9 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	task->ioprio = ioprio;
 
 	ioc = task->io_context;
+	/* see wmb() in current_io_context() */
+	smp_read_barrier_depends();
+
 	if (ioc && ioc->set_ioprio)
 		ioc->set_ioprio(ioc, ioprio);
 
-- 
cgit v0.10.2


From e014ff8d4285b81f0de0719d8eee72bc50bfd4be Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 21 Aug 2006 10:02:50 +0200
Subject: [PATCH] uninline ioprio_best()

Saves 376 bytes (5 callers) for me.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/ioprio.c b/fs/ioprio.c
index 0657831..78b1dea 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -140,6 +140,29 @@ out:
 	return ret;
 }
 
+int ioprio_best(unsigned short aprio, unsigned short bprio)
+{
+	unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
+	unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+
+	if (!ioprio_valid(aprio))
+		return bprio;
+	if (!ioprio_valid(bprio))
+		return aprio;
+
+	if (aclass == IOPRIO_CLASS_NONE)
+		aclass = IOPRIO_CLASS_BE;
+	if (bclass == IOPRIO_CLASS_NONE)
+		bclass = IOPRIO_CLASS_BE;
+
+	if (aclass == bclass)
+		return min(aprio, bprio);
+	if (aclass > bclass)
+		return bprio;
+	else
+		return aprio;
+}
+
 asmlinkage long sys_ioprio_get(int which, int who)
 {
 	struct task_struct *g, *p;
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 88d5961..8e2042b 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -59,27 +59,6 @@ static inline int task_nice_ioprio(struct task_struct *task)
 /*
  * For inheritance, return the highest of the two given priorities
  */
-static inline int ioprio_best(unsigned short aprio, unsigned short bprio)
-{
-	unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
-	unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
-
-	if (!ioprio_valid(aprio))
-		return bprio;
-	if (!ioprio_valid(bprio))
-		return aprio;
-
-	if (aclass == IOPRIO_CLASS_NONE)
-		aclass = IOPRIO_CLASS_BE;
-	if (bclass == IOPRIO_CLASS_NONE)
-		bclass = IOPRIO_CLASS_BE;
-
-	if (aclass == bclass)
-		return min(aprio, bprio);
-	if (aclass > bclass)
-		return bprio;
-	else
-		return aprio;
-}
+extern int ioprio_best(unsigned short aprio, unsigned short bprio);
 
 #endif
-- 
cgit v0.10.2


From be33c3a67bb717508ab1eab0f2fa570fabcbc4d2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 21 Aug 2006 08:36:12 +0200
Subject: [PATCH] cfq_cic_link: fix usage of wrong cfq_io_context

Obviously, cfq_cic_link() shouldn't free a just allocated cfq_io_context?
The dead key is from __cic, so drop that.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index aae3123..3a3aee0 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1561,7 +1561,7 @@ restart:
 		/* ->key must be copied to avoid race with cfq_exit_queue() */
 		k = __cic->key;
 		if (unlikely(!k)) {
-			cfq_drop_dead_cic(ioc, cic);
+			cfq_drop_dead_cic(ioc, __cic);
 			goto restart;
 		}
 
-- 
cgit v0.10.2


From f1a58ecae527fc67c87ce4dcb9e73894f64aadfe Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sun, 20 Aug 2006 17:56:38 +0900
Subject: [PATCH] ata_piix: fix ghost device probing by honoring PCS present
 bits

Move out PCS handling from piix_sata_prereset() into
piix_sata_present_mask() and use it from newly implemented
piix_sata_softreset().  Class codes for devices which are indicated to
be absent by PCS are cleared to ATA_DEV_NONE.  This fixes ghost device
problem reported on ICH6 and 7.

This patch moves PCS handling from prereset to softreset, which makes
two behavior changes.

* perform softreset even when PCS indicates no device
* PCS handling is repeated before retrying softresets due to reset
  failures.

Both behavior changes are intended and more consistent with how other
drivers behave.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c
index 5e8afc8..01b3530 100644
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -531,27 +531,25 @@ static void piix_pata_error_handler(struct ata_port *ap)
 }
 
 /**
- *	piix_sata_prereset - prereset for SATA host controller
+ *	piix_sata_present_mask - determine present mask for SATA host controller
  *	@ap: Target port
  *
- *	Reads and configures SATA PCI device's PCI config register
- *	Port Configuration and Status (PCS) to determine port and
- *	device availability.  Return -ENODEV to skip reset if no
- *	device is present.
+ *	Reads SATA PCI device's PCI config register Port Configuration
+ *	and Status (PCS) to determine port and device availability.
  *
  *	LOCKING:
  *	None (inherited from caller).
  *
  *	RETURNS:
- *	0 if device is present, -ENODEV otherwise.
+ *	determined present_mask
  */
-static int piix_sata_prereset(struct ata_port *ap)
+static unsigned int piix_sata_present_mask(struct ata_port *ap)
 {
 	struct pci_dev *pdev = to_pci_dev(ap->host_set->dev);
 	struct piix_host_priv *hpriv = ap->host_set->private_data;
 	const unsigned int *map = hpriv->map;
 	int base = 2 * ap->hard_port_no;
-	unsigned int present = 0;
+	unsigned int present_mask = 0;
 	int port, i;
 	u16 pcs;
 
@@ -564,24 +562,52 @@ static int piix_sata_prereset(struct ata_port *ap)
 			continue;
 		if ((ap->flags & PIIX_FLAG_IGNORE_PCS) ||
 		    (pcs & 1 << (hpriv->map_db->present_shift + port)))
-			present = 1;
+			present_mask |= 1 << i;
 	}
 
-	DPRINTK("ata%u: LEAVE, pcs=0x%x present=0x%x\n",
-		ap->id, pcs, present);
+	DPRINTK("ata%u: LEAVE, pcs=0x%x present_mask=0x%x\n",
+		ap->id, pcs, present_mask);
 
-	if (!present) {
-		ata_port_printk(ap, KERN_INFO, "SATA port has no device.\n");
-		ap->eh_context.i.action &= ~ATA_EH_RESET_MASK;
-		return 0;
+	return present_mask;
+}
+
+/**
+ *	piix_sata_softreset - reset SATA host port via ATA SRST
+ *	@ap: port to reset
+ *	@classes: resulting classes of attached devices
+ *
+ *	Reset SATA host port via ATA SRST.  On controllers with
+ *	reliable PCS present bits, the bits are used to determine
+ *	device presence.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise.
+ */
+static int piix_sata_softreset(struct ata_port *ap, unsigned int *classes)
+{
+	unsigned int present_mask;
+	int i, rc;
+
+	present_mask = piix_sata_present_mask(ap);
+
+	rc = ata_std_softreset(ap, classes);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		if (!(present_mask & (1 << i)))
+			classes[i] = ATA_DEV_NONE;
 	}
 
-	return ata_std_prereset(ap);
+	return 0;
 }
 
 static void piix_sata_error_handler(struct ata_port *ap)
 {
-	ata_bmdma_drive_eh(ap, piix_sata_prereset, ata_std_softreset, NULL,
+	ata_bmdma_drive_eh(ap, ata_std_prereset, piix_sata_softreset, NULL,
 			   ata_std_postreset);
 }
 
-- 
cgit v0.10.2


From c164a9ba0a8870c5c9d353f63085319931d69f23 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Tue, 22 Aug 2006 11:50:39 -0700
Subject: Fix sctp privilege elevation (CVE-2006-3745)

sctp_make_abort_user() now takes the msg_len along with the msg
so that we don't have to recalculate the bytes in iovec.
It also uses memcpy_fromiovec() so that we don't go beyond the
length allocated.

It is good to have this fix even if verify_iovec() is fixed to
return error on overflow.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index a9663b4..92eae0e 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -404,19 +404,6 @@ static inline int sctp_list_single_entry(struct list_head *head)
 	return ((head->next != head) && (head->next == head->prev));
 }
 
-/* Calculate the size (in bytes) occupied by the data of an iovec.  */
-static inline size_t get_user_iov_size(struct iovec *iov, int iovlen)
-{
-	size_t retval = 0;
-
-	for (; iovlen > 0; --iovlen) {
-		retval += iov->iov_len;
-		iov++;
-	}
-
-	return retval;
-}
-
 /* Generate a random jitter in the range of -50% ~ +50% of input RTO. */
 static inline __s32 sctp_jitter(__u32 rto)
 {
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 1eac3d0..de313de 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -221,8 +221,7 @@ struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *,
 				      const struct sctp_chunk *,
 				      __u32 tsn);
 struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *,
-				   const struct sctp_chunk *,
-				   const struct msghdr *);
+					const struct msghdr *, size_t msg_len);
 struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *,
 				   const struct sctp_chunk *,
 				   const __u8 *,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4f11f58..17b5092 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -806,38 +806,26 @@ no_mem:
 
 /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error.  */
 struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
-				   const struct sctp_chunk *chunk,
-				   const struct msghdr *msg)
+					const struct msghdr *msg,
+					size_t paylen)
 {
 	struct sctp_chunk *retval;
-	void *payload = NULL, *payoff;
-	size_t paylen = 0;
-	struct iovec *iov = NULL;
-	int iovlen = 0;
-
-	if (msg) {
-		iov = msg->msg_iov;
-		iovlen = msg->msg_iovlen;
-		paylen = get_user_iov_size(iov, iovlen);
-	}
+	void *payload = NULL;
+	int err;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen);
+	retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
 	if (!retval)
 		goto err_chunk;
 
 	if (paylen) {
 		/* Put the msg_iov together into payload.  */
-		payload = kmalloc(paylen, GFP_ATOMIC);
+		payload = kmalloc(paylen, GFP_KERNEL);
 		if (!payload)
 			goto err_payload;
-		payoff = payload;
 
-		for (; iovlen > 0; --iovlen) {
-			if (copy_from_user(payoff, iov->iov_base,iov->iov_len))
-				goto err_copy;
-			payoff += iov->iov_len;
-			iov++;
-		}
+		err = memcpy_fromiovec(payload, msg->msg_iov, paylen);
+		if (err < 0)
+			goto err_copy;
 	}
 
 	sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ead3f1b..5b5ae79 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4031,18 +4031,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	 * from its upper layer, but retransmits data to the far end
 	 * if necessary to fill gaps.
 	 */
-	struct msghdr *msg = arg;
-	struct sctp_chunk *abort;
+	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
 
 	retval = SCTP_DISPOSITION_CONSUME;
 
-	/* Generate ABORT chunk to send the peer.  */
-	abort = sctp_make_abort_user(asoc, NULL, msg);
-	if (!abort)
-		retval = SCTP_DISPOSITION_NOMEM;
-	else
-		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
 
 	/* Even if we can't send the ABORT due to low memory delete the
 	 * TCB.  This is a departure from our typical NOMEM handling.
@@ -4166,8 +4160,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
-	struct msghdr *msg = arg;
-	struct sctp_chunk *abort;
+	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
 
 	/* Stop T1-init timer */
@@ -4175,12 +4168,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
 	retval = SCTP_DISPOSITION_CONSUME;
 
-	/* Generate ABORT chunk to send the peer */
-	abort = sctp_make_abort_user(asoc, NULL, msg);
-	if (!abort)
-		retval = SCTP_DISPOSITION_NOMEM;
-	else
-		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 54722e6..fde3f55 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1520,8 +1520,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			goto out_unlock;
 		}
 		if (sinfo_flags & SCTP_ABORT) {
+			struct sctp_chunk *chunk;
+
+			chunk = sctp_make_abort_user(asoc, msg, msg_len);
+			if (!chunk) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
 			SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
-			sctp_primitive_ABORT(asoc, msg);
+			sctp_primitive_ABORT(asoc, chunk);
 			err = 0;
 			goto out_unlock;
 		}
-- 
cgit v0.10.2


From 00a2b0f6dd2372842df73de72d51621b539fea44 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 15 Aug 2006 13:56:26 +0200
Subject: Fix possible UDF deadlock and memory corruption (CVE-2006-4145)

UDF code is not really ready to handle extents larger that 1GB. This is
the easy way to forbid creating those.

Also truncation code did not count with the case when there are no
extents in the file and we are extending the file.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7de172e..fcce1a2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1659,7 +1659,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 		iput(inode);
 		goto error_out;
 	}
-	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_maxbytes = 1<<30;
 	return 0;
 
 error_out:
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index e1b0e8c..0abd66c 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode)
 	{
 		if (offset)
 		{
-			extoffset -= adsize;
-			etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
-			if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
-			{
-				extoffset -= adsize;
-				elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
-				udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
+			/*
+			 *  OK, there is not extent covering inode->i_size and
+			 *  no extent above inode->i_size => truncate is
+			 *  extending the file by 'offset'.
+			 */
+			if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) ||
+			    (bh && extoffset == sizeof(struct allocExtDesc))) {
+				/* File has no extents at all! */
+				memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+				elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+				udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
 			}
-			else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
-			{
-				kernel_lb_addr neloc = { 0, 0 };
+			else {
 				extoffset -= adsize;
-				nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
-					((elen + offset + inode->i_sb->s_blocksize - 1) &
-					~(inode->i_sb->s_blocksize - 1));
-				udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
-				udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
-			}
-			else
-			{
-				if (elen & (inode->i_sb->s_blocksize - 1))
+				etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
+				if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
+				{
+					extoffset -= adsize;
+					elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
+					udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
+				}
+				else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
 				{
+					kernel_lb_addr neloc = { 0, 0 };
 					extoffset -= adsize;
-					elen = EXT_RECORDED_ALLOCATED |
-						((elen + inode->i_sb->s_blocksize - 1) &
+					nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
+						((elen + offset + inode->i_sb->s_blocksize - 1) &
 						~(inode->i_sb->s_blocksize - 1));
-					udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+					udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
+					udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
+				}
+				else
+				{
+					if (elen & (inode->i_sb->s_blocksize - 1))
+					{
+						extoffset -= adsize;
+						elen = EXT_RECORDED_ALLOCATED |
+							((elen + inode->i_sb->s_blocksize - 1) &
+							~(inode->i_sb->s_blocksize - 1));
+						udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+					}
+					memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+					elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+					udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
 				}
-				memset(&eloc, 0x00, sizeof(kernel_lb_addr));
-				elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
-				udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
 			}
 		}
 	}
-- 
cgit v0.10.2


From 2d8f613160ed303f5fe310f21bba334a4b074fa5 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 22 Aug 2006 21:22:13 +0400
Subject: elv_unregister: fix possible crash on module unload

An exiting task or process which didn't do I/O yet have no io context,
elv_unregister() should check it is not NULL.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/block/elevator.c b/block/elevator.c
index bc7baee..9b72dc7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -765,7 +765,8 @@ void elv_unregister(struct elevator_type *e)
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			task_lock(p);
-			e->ops.trim(p->io_context);
+			if (p->io_context)
+				e->ops.trim(p->io_context);
 			task_unlock(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
-- 
cgit v0.10.2


From e0b7cde9975e17a61b4511c7822803dfb7210011 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Aug 2006 15:31:08 -0700
Subject: [NETFILTER]: arp_tables: fix table locking in arpt_do_table

table->private might change because of ruleset changes, don't use it
without holding the lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index df4854c..8d1d7a6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -236,7 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
-	struct xt_table_info *private = table->private;
+	struct xt_table_info *private;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
 	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -248,6 +248,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	outdev = out ? out->name : nulldevname;
 
 	read_lock_bh(&table->lock);
+	private = table->private;
 	table_base = (void *)private->entries[smp_processor_id()];
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
-- 
cgit v0.10.2


From 316c1592bea94ead75301cb764523661fbbcc1ca Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 22 Aug 2006 00:06:11 -0700
Subject: [TCP]: Limit window scaling if window is clamped.

This small change allows for easy per-route workarounds for broken hosts or
middleboxes that are not compliant with TCP standards for window scaling.
Rather than having to turn off window scaling globally. This patch allows
reducing or disabling window scaling if window clamp is present.

Example: Mark Lord reported a problem with 2.6.17 kernel being unable to
access http://www.everymac.com

# ip route add 216.145.246.23/32 via 10.8.0.1 window 65535

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 507adef..b4f3ffe 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
 		 * See RFC1323 for an explanation of the limit to 14 
 		 */
 		space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+		space = min_t(u32, space, *window_clamp);
 		while (space > 65535 && (*rcv_wscale) < 14) {
 			space >>= 1;
 			(*rcv_wscale)++;
-- 
cgit v0.10.2


From 3ffaa8c7c0f884171a273cd2145b8fbbf233ba22 Mon Sep 17 00:00:00 2001
From: Michael Rash <mbr@cipherdyne.org>
Date: Tue, 22 Aug 2006 00:45:22 -0700
Subject: [TEXTSEARCH]: Fix Boyer Moore initialization bug

The pattern is set after trying to compute the prefix table, which tries
to use it. Initialize it before calling compute_prefix_tbl, make
compute_prefix_tbl consistently use only the data from struct ts_bm
and remove the now unnecessary arguments.

Signed-off-by: Michael Rash <mbr@cipherdyne.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/lib/ts_bm.c b/lib/ts_bm.c
index 0110e44..d90822c 100644
--- a/lib/ts_bm.c
+++ b/lib/ts_bm.c
@@ -111,15 +111,14 @@ static int subpattern(u8 *pattern, int i, int j, int g)
 	return ret;
 }
 
-static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
-			       unsigned int len)
+static void compute_prefix_tbl(struct ts_bm *bm)
 {
 	int i, j, g;
 
 	for (i = 0; i < ASIZE; i++)
-		bm->bad_shift[i] = len;
-	for (i = 0; i < len - 1; i++)
-		bm->bad_shift[pattern[i]] = len - 1 - i;
+		bm->bad_shift[i] = bm->patlen;
+	for (i = 0; i < bm->patlen - 1; i++)
+		bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
 
 	/* Compute the good shift array, used to match reocurrences 
 	 * of a subpattern */
@@ -150,8 +149,8 @@ static struct ts_config *bm_init(const void *pattern, unsigned int len,
 	bm = ts_config_priv(conf);
 	bm->patlen = len;
 	bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
-	compute_prefix_tbl(bm, pattern, len);
 	memcpy(bm->pattern, pattern, len);
+	compute_prefix_tbl(bm);
 
 	return conf;
 }
-- 
cgit v0.10.2


From c46f4774228e4dbf716b713e91ad20db50f629ef Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 21 Aug 2006 15:27:16 -0700
Subject: [SPARC64]: Fix pfn_pte() build failure.

The "%uhi" needs to be "%%uhi" because we want a real
"%" character in the assembler here, instead of an
assembler variable expansion.

Aparently older GCCs were more liberal and interpreted
this %-letter as a literal "%" for whatever reason.

Based upon a build failure report from Meelis Roos.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 1ba19eb..ebfe395 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -234,7 +234,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 	sz_bits = 0UL;
 	if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) {
 		__asm__ __volatile__(
-		"\n661:	sethi		%uhi(%1), %0\n"
+		"\n661:	sethi		%%uhi(%1), %0\n"
 		"	sllx		%0, 32, %0\n"
 		"	.section	.sun4v_2insn_patch, \"ax\"\n"
 		"	.word		661b\n"
-- 
cgit v0.10.2


From 8ea371fb6df5a6e8056265e0089fd578e87797fc Mon Sep 17 00:00:00 2001
From: Florin Malita <fmalita@gmail.com>
Date: Wed, 23 Aug 2006 00:45:33 -0400
Subject: Input: atkbd - fix overrun in atkbd_set_repeat_rate()

This was introduced in commit 3d0f0fa0cb554541e10cb8cb84104e4b10828468:
bounds checking is performed against period[32] while indexing delay[4].

Spotted by Coverity, CID 1376.

Signed-off-by: Florin Malita <fmalita@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 6bfa0cf..a86afd0 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -498,7 +498,7 @@ static int atkbd_set_repeat_rate(struct atkbd *atkbd)
 		i++;
 	dev->rep[REP_PERIOD] = period[i];
 
-	while (j < ARRAY_SIZE(period) - 1 && delay[j] < dev->rep[REP_DELAY])
+	while (j < ARRAY_SIZE(delay) - 1 && delay[j] < dev->rep[REP_DELAY])
 		j++;
 	dev->rep[REP_DELAY] = delay[j];
 
-- 
cgit v0.10.2


From 72a623be00fa3d77724c1b0cac07c1bac60e70a5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@insightbb.com>
Date: Wed, 23 Aug 2006 00:47:39 -0400
Subject: Input: wistron - fix crash due to referencing __initdata

Remove __initdata markings from keymaps as they are used during
normal driver operations.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c
index a8efc1a..de0f46d 100644
--- a/drivers/input/misc/wistron_btns.c
+++ b/drivers/input/misc/wistron_btns.c
@@ -259,11 +259,11 @@ static int __init dmi_matched(struct dmi_system_id *dmi)
 	return 1;
 }
 
-static struct key_entry keymap_empty[] __initdata = {
+static struct key_entry keymap_empty[] = {
 	{ KE_END, 0 }
 };
 
-static struct key_entry keymap_fs_amilo_pro_v2000[] __initdata = {
+static struct key_entry keymap_fs_amilo_pro_v2000[] = {
 	{ KE_KEY,  0x01, KEY_HELP },
 	{ KE_KEY,  0x11, KEY_PROG1 },
 	{ KE_KEY,  0x12, KEY_PROG2 },
@@ -273,7 +273,7 @@ static struct key_entry keymap_fs_amilo_pro_v2000[] __initdata = {
 	{ KE_END,  0 }
 };
 
-static struct key_entry keymap_fujitsu_n3510[] __initdata = {
+static struct key_entry keymap_fujitsu_n3510[] = {
 	{ KE_KEY, 0x11, KEY_PROG1 },
 	{ KE_KEY, 0x12, KEY_PROG2 },
 	{ KE_KEY, 0x36, KEY_WWW },
@@ -285,7 +285,7 @@ static struct key_entry keymap_fujitsu_n3510[] __initdata = {
 	{ KE_END, 0 }
 };
 
-static struct key_entry keymap_wistron_ms2111[] __initdata = {
+static struct key_entry keymap_wistron_ms2111[] = {
 	{ KE_KEY,  0x11, KEY_PROG1 },
 	{ KE_KEY,  0x12, KEY_PROG2 },
 	{ KE_KEY,  0x13, KEY_PROG3 },
@@ -294,7 +294,7 @@ static struct key_entry keymap_wistron_ms2111[] __initdata = {
 	{ KE_END,  0 }
 };
 
-static struct key_entry keymap_wistron_ms2141[] __initdata = {
+static struct key_entry keymap_wistron_ms2141[] = {
 	{ KE_KEY,  0x11, KEY_PROG1 },
 	{ KE_KEY,  0x12, KEY_PROG2 },
 	{ KE_WIFI, 0x30, 0 },
@@ -307,7 +307,7 @@ static struct key_entry keymap_wistron_ms2141[] __initdata = {
 	{ KE_END,  0 }
 };
 
-static struct key_entry keymap_acer_aspire_1500[] __initdata = {
+static struct key_entry keymap_acer_aspire_1500[] = {
 	{ KE_KEY, 0x11, KEY_PROG1 },
 	{ KE_KEY, 0x12, KEY_PROG2 },
 	{ KE_WIFI, 0x30, 0 },
@@ -317,7 +317,7 @@ static struct key_entry keymap_acer_aspire_1500[] __initdata = {
 	{ KE_END, 0 }
 };
 
-static struct key_entry keymap_acer_travelmate_240[] __initdata = {
+static struct key_entry keymap_acer_travelmate_240[] = {
 	{ KE_KEY, 0x31, KEY_MAIL },
 	{ KE_KEY, 0x36, KEY_WWW },
 	{ KE_KEY, 0x11, KEY_PROG1 },
@@ -327,7 +327,7 @@ static struct key_entry keymap_acer_travelmate_240[] __initdata = {
 	{ KE_END, 0 }
 };
 
-static struct key_entry keymap_aopen_1559as[] __initdata = {
+static struct key_entry keymap_aopen_1559as[] = {
 	{ KE_KEY,  0x01, KEY_HELP },
 	{ KE_KEY,  0x06, KEY_PROG3 },
 	{ KE_KEY,  0x11, KEY_PROG1 },
-- 
cgit v0.10.2


From 90414be9523208f0b667fd58c22e26b8db0594de Mon Sep 17 00:00:00 2001
From: Pozsar Balazs <pozsy@uhulinux.hu>
Date: Wed, 23 Aug 2006 00:48:03 -0400
Subject: Input: psmouse - fix Intellimouse 4.0 initialization

Revert the superfluous initilization causing some mice become jumpy.

Signed-off-by: Pozsar Balazs <pozsy@uhulinux.hu>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 8bc9f51..343afa3 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -485,13 +485,6 @@ static int im_explorer_detect(struct psmouse *psmouse, int set_properties)
 	param[0] =  40;
 	ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE);
 
-	param[0] = 200;
-	ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE);
-	param[0] = 200;
-	ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE);
-	param[0] =  60;
-	ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE);
-
 	if (set_properties) {
 		set_bit(BTN_MIDDLE, psmouse->dev->keybit);
 		set_bit(REL_WHEEL, psmouse->dev->relbit);
-- 
cgit v0.10.2


From c712a9de94a5df5bc0087c14ad0b1aac2c147991 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 23 Aug 2006 00:48:33 -0400
Subject: Input: remove dead URLs from Doclumentation/input/joystick.txt

Closes #2804.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>

diff --git a/Documentation/input/joystick.txt b/Documentation/input/joystick.txt
index d53b857..841c353 100644
--- a/Documentation/input/joystick.txt
+++ b/Documentation/input/joystick.txt
@@ -39,7 +39,6 @@ them. Bug reports and success stories are also welcome.
 
   The input project website is at:
 
-	http://www.suse.cz/development/input/
 	http://atrey.karlin.mff.cuni.cz/~vojtech/input/
 
   There is also a mailing list for the driver at:
-- 
cgit v0.10.2


From 4c86cd9c59428a40233fb707f5de18caa5ab5cb7 Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Fri, 18 Aug 2006 18:03:08 -0500
Subject: [POWERPC] Fix interrupts on 8540 ADS board

* Fixed 8540 ADS support for the new irq layer
* Fixed 8540 ADS support for mapping PCI interrupts
* Updated 8540 ADS to use device tree for interrupt assignment
  and sense values

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 06a4976..9d2acfb 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -37,79 +37,7 @@ unsigned long isa_io_base = 0;
 unsigned long isa_mem_base = 0;
 #endif
 
-/*
- * Internal interrupts are all Level Sensitive, and Positive Polarity
- *
- * Note:  Likely, this table and the following function should be
- *        obtained and derived from the OF Device Tree.
- */
-static u_char mpc85xx_ads_openpic_initsenses[] __initdata = {
-	MPC85XX_INTERNAL_IRQ_SENSES,
-	0x0,			/* External  0: */
-#if defined(CONFIG_PCI)
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* Ext 1: PCI slot 0 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* Ext 2: PCI slot 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* Ext 3: PCI slot 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* Ext 4: PCI slot 3 */
-#else
-	0x0,			/* External  1: */
-	0x0,			/* External  2: */
-	0x0,			/* External  3: */
-	0x0,			/* External  4: */
-#endif
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* External 5: PHY */
-	0x0,			/* External  6: */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* External 7: PHY */
-	0x0,			/* External  8: */
-	0x0,			/* External  9: */
-	0x0,			/* External 10: */
-	0x0,			/* External 11: */
-};
-
 #ifdef CONFIG_PCI
-/*
- * interrupt routing
- */
-
-int
-mpc85xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin)
-{
-	static char pci_irq_table[][4] =
-	    /*
-	     * This is little evil, but works around the fact
-	     * that revA boards have IDSEL starting at 18
-	     * and others boards (older) start at 12
-	     *
-	     *      PCI IDSEL/INTPIN->INTLINE
-	     *       A      B      C      D
-	     */
-	{
-		{PIRQA, PIRQB, PIRQC, PIRQD},	/* IDSEL 2 */
-		{PIRQD, PIRQA, PIRQB, PIRQC},
-		{PIRQC, PIRQD, PIRQA, PIRQB},
-		{PIRQB, PIRQC, PIRQD, PIRQA},	/* IDSEL 5 */
-		{0, 0, 0, 0},	/* -- */
-		{0, 0, 0, 0},	/* -- */
-		{0, 0, 0, 0},	/* -- */
-		{0, 0, 0, 0},	/* -- */
-		{0, 0, 0, 0},	/* -- */
-		{0, 0, 0, 0},	/* -- */
-		{PIRQA, PIRQB, PIRQC, PIRQD},	/* IDSEL 12 */
-		{PIRQD, PIRQA, PIRQB, PIRQC},
-		{PIRQC, PIRQD, PIRQA, PIRQB},
-		{PIRQB, PIRQC, PIRQD, PIRQA},	/* IDSEL 15 */
-		{0, 0, 0, 0},	/* -- */
-		{0, 0, 0, 0},	/* -- */
-		{PIRQA, PIRQB, PIRQC, PIRQD},	/* IDSEL 18 */
-		{PIRQD, PIRQA, PIRQB, PIRQC},
-		{PIRQC, PIRQD, PIRQA, PIRQB},
-		{PIRQB, PIRQC, PIRQD, PIRQA},	/* IDSEL 21 */
-	};
-
-	const long min_idsel = 2, max_idsel = 21, irqs_per_slot = 4;
-	return PCI_IRQ_TABLE_LOOKUP;
-}
-
 int
 mpc85xx_exclude_device(u_char bus, u_char devfn)
 {
@@ -119,44 +47,63 @@ mpc85xx_exclude_device(u_char bus, u_char devfn)
 		return PCIBIOS_SUCCESSFUL;
 }
 
+void __init
+mpc85xx_pcibios_fixup(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
+}
 #endif /* CONFIG_PCI */
 
 
 void __init mpc85xx_ads_pic_init(void)
 {
-	struct mpic *mpic1;
-	phys_addr_t OpenPIC_PAddr;
-
-	/* Determine the Physical Address of the OpenPIC regs */
-	OpenPIC_PAddr = get_immrbase() + MPC85xx_OPENPIC_OFFSET;
-
-	mpic1 = mpic_alloc(OpenPIC_PAddr,
-			   MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
-			   4, MPC85xx_OPENPIC_IRQ_OFFSET, 0, 250,
-			   mpc85xx_ads_openpic_initsenses,
-			   sizeof(mpc85xx_ads_openpic_initsenses),
-			   " OpenPIC  ");
-	BUG_ON(mpic1 == NULL);
-	mpic_assign_isu(mpic1, 0, OpenPIC_PAddr + 0x10200);
-	mpic_assign_isu(mpic1, 1, OpenPIC_PAddr + 0x10280);
-	mpic_assign_isu(mpic1, 2, OpenPIC_PAddr + 0x10300);
-	mpic_assign_isu(mpic1, 3, OpenPIC_PAddr + 0x10380);
-	mpic_assign_isu(mpic1, 4, OpenPIC_PAddr + 0x10400);
-	mpic_assign_isu(mpic1, 5, OpenPIC_PAddr + 0x10480);
-	mpic_assign_isu(mpic1, 6, OpenPIC_PAddr + 0x10500);
-	mpic_assign_isu(mpic1, 7, OpenPIC_PAddr + 0x10580);
-
-	/* dummy mappings to get to 48 */
-	mpic_assign_isu(mpic1, 8, OpenPIC_PAddr + 0x10600);
-	mpic_assign_isu(mpic1, 9, OpenPIC_PAddr + 0x10680);
-	mpic_assign_isu(mpic1, 10, OpenPIC_PAddr + 0x10700);
-	mpic_assign_isu(mpic1, 11, OpenPIC_PAddr + 0x10780);
-
-	/* External ints */
-	mpic_assign_isu(mpic1, 12, OpenPIC_PAddr + 0x10000);
-	mpic_assign_isu(mpic1, 13, OpenPIC_PAddr + 0x10080);
-	mpic_assign_isu(mpic1, 14, OpenPIC_PAddr + 0x10100);
-	mpic_init(mpic1);
+	struct mpic *mpic;
+	struct resource r;
+	struct device_node *np = NULL;
+
+	np = of_find_node_by_type(np, "open-pic");
+
+	if (np == NULL) {
+		printk(KERN_ERR "Could not find open-pic node\n");
+		return;
+	}
+
+	if(of_address_to_resource(np, 0, &r)) {
+		printk(KERN_ERR "Could not map mpic register space\n");
+		of_node_put(np);
+		return;
+	}
+
+	mpic = mpic_alloc(np, r.start,
+			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
+			4, 0, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	of_node_put(np);
+
+	mpic_assign_isu(mpic, 0, r.start + 0x10200);
+	mpic_assign_isu(mpic, 1, r.start + 0x10280);
+	mpic_assign_isu(mpic, 2, r.start + 0x10300);
+	mpic_assign_isu(mpic, 3, r.start + 0x10380);
+	mpic_assign_isu(mpic, 4, r.start + 0x10400);
+	mpic_assign_isu(mpic, 5, r.start + 0x10480);
+	mpic_assign_isu(mpic, 6, r.start + 0x10500);
+	mpic_assign_isu(mpic, 7, r.start + 0x10580);
+
+	/* Unused on this platform (leave room for 8548) */
+	mpic_assign_isu(mpic, 8, r.start + 0x10600);
+	mpic_assign_isu(mpic, 9, r.start + 0x10680);
+	mpic_assign_isu(mpic, 10, r.start + 0x10700);
+	mpic_assign_isu(mpic, 11, r.start + 0x10780);
+
+	/* External Interrupts */
+	mpic_assign_isu(mpic, 12, r.start + 0x10000);
+	mpic_assign_isu(mpic, 13, r.start + 0x10080);
+	mpic_assign_isu(mpic, 14, r.start + 0x10100);
+
+	mpic_init(mpic);
 }
 
 /*
@@ -165,7 +112,9 @@ void __init mpc85xx_ads_pic_init(void)
 static void __init mpc85xx_ads_setup_arch(void)
 {
 	struct device_node *cpu;
+#ifdef CONFIG_PCI
 	struct device_node *np;
+#endif
 
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_ads_setup_arch()", 0);
@@ -186,8 +135,7 @@ static void __init mpc85xx_ads_setup_arch(void)
 	for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;)
 		add_bridge(np);
 
-	ppc_md.pci_swizzle = common_swizzle;
-	ppc_md.pci_map_irq = mpc85xx_map_irq;
+	ppc_md.pcibios_fixup = mpc85xx_pcibios_fixup;
 	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
 
-- 
cgit v0.10.2


From ddd64159eb0d090766eee79b191a974ffdd83a42 Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Thu, 17 Aug 2006 20:24:48 -0500
Subject: [POWERPC] Fix CDS IRQ handling and PCI code

* Fix IRQ support in the 85xx CDS boards so it uses the new
  generic stuff
* Fix PCI IRQ mapping to use the device tree
* Disabled i8259 support to allow the CDS to boot.  This will be
  fixed soon, but the current code doesn't even compile, so this
  is a vast improvement

Signed-off-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 454fc53..c3268d9 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -14,7 +14,6 @@ config MPC8540_ADS
 config MPC85xx_CDS
 	bool "Freescale MPC85xx CDS"
 	select DEFAULT_UIMAGE
-	select PPC_I8259 if PCI
 	help
 	  This option enables support for the MPC85xx CDS board
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 18e6e11..1d357d3 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -57,94 +57,8 @@ unsigned long isa_mem_base = 0;
 static int cds_pci_slot = 2;
 static volatile u8 *cadmus;
 
-/*
- * Internal interrupts are all Level Sensitive, and Positive Polarity
- *
- * Note:  Likely, this table and the following function should be
- *        obtained and derived from the OF Device Tree.
- */
-static u_char mpc85xx_cds_openpic_initsenses[] __initdata = {
-	MPC85XX_INTERNAL_IRQ_SENSES,
-#if defined(CONFIG_PCI)
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Ext 0: PCI slot 0 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* Ext 1: PCI slot 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* Ext 2: PCI slot 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* Ext 3: PCI slot 3 */
-#else
-	0x0,				/* External  0: */
-	0x0,				/* External  1: */
-	0x0,				/* External  2: */
-	0x0,				/* External  3: */
-#endif
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* External 5: PHY */
-	0x0,				/* External  6: */
-	0x0,				/* External  7: */
-	0x0,				/* External  8: */
-	0x0,				/* External  9: */
-	0x0,				/* External 10: */
-#ifdef CONFIG_PCI
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),    /* Ext 11: PCI2 slot 0 */
-#else
-	0x0,				/* External 11: */
-#endif
-};
-
 
 #ifdef CONFIG_PCI
-/*
- * interrupt routing
- */
-int
-mpc85xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin)
-{
-	struct pci_controller *hose = pci_bus_to_hose(dev->bus->number);
-
-	if (!hose->index)
-	{
-		/* Handle PCI1 interrupts */
-		char pci_irq_table[][4] =
-			/*
-			 *      PCI IDSEL/INTPIN->INTLINE
-			 *        A      B      C      D
-			 */
-
-			/* Note IRQ assignment for slots is based on which slot the elysium is
-			 * in -- in this setup elysium is in slot #2 (this PIRQA as first
-			 * interrupt on slot */
-		{
-			{ 0, 1, 2, 3 }, /* 16 - PMC */
-			{ 0, 1, 2, 3 }, /* 17 P2P (Tsi320) */
-			{ 0, 1, 2, 3 }, /* 18 - Slot 1 */
-			{ 1, 2, 3, 0 }, /* 19 - Slot 2 */
-			{ 2, 3, 0, 1 }, /* 20 - Slot 3 */
-			{ 3, 0, 1, 2 }, /* 21 - Slot 4 */
-		};
-
-		const long min_idsel = 16, max_idsel = 21, irqs_per_slot = 4;
-		int i, j;
-
-		for (i = 0; i < 6; i++)
-			for (j = 0; j < 4; j++)
-				pci_irq_table[i][j] =
-					((pci_irq_table[i][j] + 5 -
-					  cds_pci_slot) & 0x3) + PIRQ0A;
-
-		return PCI_IRQ_TABLE_LOOKUP;
-	} else {
-		/* Handle PCI2 interrupts (if we have one) */
-		char pci_irq_table[][4] =
-		{
-			/*
-			 * We only have one slot and one interrupt
-			 * going to PIRQA - PIRQD */
-			{ PIRQ1A, PIRQ1A, PIRQ1A, PIRQ1A }, /* 21 - slot 0 */
-		};
-
-		const long min_idsel = 21, max_idsel = 21, irqs_per_slot = 4;
-
-		return PCI_IRQ_TABLE_LOOKUP;
-	}
-}
 
 #define ARCADIA_HOST_BRIDGE_IDSEL	17
 #define ARCADIA_2ND_BRIDGE_IDSEL	3
@@ -210,50 +124,104 @@ mpc85xx_cds_pcibios_fixup(void)
 		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
 		pci_dev_put(dev);
 	}
+
+	/* Now map all the PCI irqs */
+	dev = NULL;
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
+}
+
+#ifdef CONFIG_PPC_I8259
+#warning The i8259 PIC support is currently broken
+static void mpc85xx_8259_cascade(unsigned int irq, struct
+		irq_desc *desc, struct pt_regs *regs)
+{
+	unsigned int cascade_irq = i8259_irq(regs);
+
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq, regs);
+
+	desc->chip->eoi(irq);
 }
+#endif /* PPC_I8259 */
 #endif /* CONFIG_PCI */
 
 void __init mpc85xx_cds_pic_init(void)
 {
-	struct mpic *mpic1;
-	phys_addr_t OpenPIC_PAddr;
+	struct mpic *mpic;
+	struct resource r;
+	struct device_node *np = NULL;
+	struct device_node *cascade_node = NULL;
+	int cascade_irq;
 
-	/* Determine the Physical Address of the OpenPIC regs */
-	OpenPIC_PAddr = get_immrbase() + MPC85xx_OPENPIC_OFFSET;
+	np = of_find_node_by_type(np, "open-pic");
+
+	if (np == NULL) {
+		printk(KERN_ERR "Could not find open-pic node\n");
+		return;
+	}
 
-	mpic1 = mpic_alloc(OpenPIC_PAddr,
+	if (of_address_to_resource(np, 0, &r)) {
+		printk(KERN_ERR "Failed to map mpic register space\n");
+		of_node_put(np);
+		return;
+	}
+
+	mpic = mpic_alloc(np, r.start,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
-			4, MPC85xx_OPENPIC_IRQ_OFFSET, 0, 250,
-			mpc85xx_cds_openpic_initsenses,
-			sizeof(mpc85xx_cds_openpic_initsenses), " OpenPIC  ");
-	BUG_ON(mpic1 == NULL);
-	mpic_assign_isu(mpic1, 0, OpenPIC_PAddr + 0x10200);
-	mpic_assign_isu(mpic1, 1, OpenPIC_PAddr + 0x10280);
-	mpic_assign_isu(mpic1, 2, OpenPIC_PAddr + 0x10300);
-	mpic_assign_isu(mpic1, 3, OpenPIC_PAddr + 0x10380);
-	mpic_assign_isu(mpic1, 4, OpenPIC_PAddr + 0x10400);
-	mpic_assign_isu(mpic1, 5, OpenPIC_PAddr + 0x10480);
-	mpic_assign_isu(mpic1, 6, OpenPIC_PAddr + 0x10500);
-	mpic_assign_isu(mpic1, 7, OpenPIC_PAddr + 0x10580);
-
-	/* dummy mappings to get to 48 */
-	mpic_assign_isu(mpic1, 8, OpenPIC_PAddr + 0x10600);
-	mpic_assign_isu(mpic1, 9, OpenPIC_PAddr + 0x10680);
-	mpic_assign_isu(mpic1, 10, OpenPIC_PAddr + 0x10700);
-	mpic_assign_isu(mpic1, 11, OpenPIC_PAddr + 0x10780);
-
-	/* External ints */
-	mpic_assign_isu(mpic1, 12, OpenPIC_PAddr + 0x10000);
-	mpic_assign_isu(mpic1, 13, OpenPIC_PAddr + 0x10080);
-	mpic_assign_isu(mpic1, 14, OpenPIC_PAddr + 0x10100);
-
-	mpic_init(mpic1);
+			4, 0, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	/* Return the mpic node */
+	of_node_put(np);
+
+	mpic_assign_isu(mpic, 0, r.start + 0x10200);
+	mpic_assign_isu(mpic, 1, r.start + 0x10280);
+	mpic_assign_isu(mpic, 2, r.start + 0x10300);
+	mpic_assign_isu(mpic, 3, r.start + 0x10380);
+	mpic_assign_isu(mpic, 4, r.start + 0x10400);
+	mpic_assign_isu(mpic, 5, r.start + 0x10480);
+	mpic_assign_isu(mpic, 6, r.start + 0x10500);
+	mpic_assign_isu(mpic, 7, r.start + 0x10580);
+
+	/* Used only for 8548 so far, but no harm in
+	 * allocating them for everyone */
+	mpic_assign_isu(mpic, 8, r.start + 0x10600);
+	mpic_assign_isu(mpic, 9, r.start + 0x10680);
+	mpic_assign_isu(mpic, 10, r.start + 0x10700);
+	mpic_assign_isu(mpic, 11, r.start + 0x10780);
+
+	/* External Interrupts */
+	mpic_assign_isu(mpic, 12, r.start + 0x10000);
+	mpic_assign_isu(mpic, 13, r.start + 0x10080);
+	mpic_assign_isu(mpic, 14, r.start + 0x10100);
+
+	mpic_init(mpic);
+
+#ifdef CONFIG_PPC_I8259
+	/* Initialize the i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "chrp,iic")) {
+			cascade_node = np;
+			break;
+		}
+
+	if (cascade_node == NULL) {
+		printk(KERN_DEBUG "Could not find i8259 PIC\n");
+		return;
+	}
 
-#ifdef CONFIG_PCI
-	mpic_setup_cascade(PIRQ0A, i8259_irq_cascade, NULL);
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (cascade_irq == NO_IRQ) {
+		printk(KERN_ERR "Failed to map cascade interrupt\n");
+		return;
+	}
 
-	i8259_init(0,0);
-#endif
+	i8259_init(cascade_node, 0);
+	of_node_put(cascade_node);
+
+	set_irq_chained_handler(cascade_irq, mpc85xx_8259_cascade);
+#endif /* CONFIG_PPC_I8259 */
 }
 
 
@@ -298,8 +266,6 @@ mpc85xx_cds_setup_arch(void)
 		add_bridge(np);
 
 	ppc_md.pcibios_fixup = mpc85xx_cds_pcibios_fixup;
-	ppc_md.pci_swizzle = common_swizzle;
-	ppc_md.pci_map_irq = mpc85xx_map_irq;
 	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
 #endif
 
-- 
cgit v0.10.2


From 2654d6385f6cad00cfb8f5087aeb10d0ed781e74 Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Fri, 18 Aug 2006 18:04:34 -0500
Subject: [POWERPC] Add 85xx DTS files to powerpc

Added the mpc85xx family of dts files to the powerpc tree

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/boot/dts/mpc8540ads.dts b/arch/powerpc/boot/dts/mpc8540ads.dts
new file mode 100644
index 0000000..93d2c2d
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8540ads.dts
@@ -0,0 +1,257 @@
+/*
+ * MPC8540 ADS Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+
+/ {
+	model = "MPC8540ADS";
+	compatible = "MPC85xxADS";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	linux,phandle = <100>;
+
+	cpus {
+		#cpus = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		linux,phandle = <200>;
+
+		PowerPC,8540@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <20>;	// 32 bytes
+			i-cache-line-size = <20>;	// 32 bytes
+			d-cache-size = <8000>;		// L1, 32K
+			i-cache-size = <8000>;		// L1, 32K
+			timebase-frequency = <0>;	//  33 MHz, from uboot
+			bus-frequency = <0>;	// 166 MHz
+			clock-frequency = <0>;	// 825 MHz, from uboot
+			32-bit;
+			linux,phandle = <201>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		linux,phandle = <300>;
+		reg = <00000000 08000000>;	// 128M at 0x0
+	};
+
+	soc8540@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		#interrupt-cells = <2>;
+		device_type = "soc";
+		ranges = <0 e0000000 00100000>;
+		reg = <e0000000 00100000>;	// CCSRBAR 1M
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			device_type = "i2c";
+			compatible = "fsl-i2c";
+			reg = <3000 100>;
+			interrupts = <1b 2>;
+			interrupt-parent = <40000>;
+			dfsrr;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "mdio";
+			compatible = "gianfar";
+			reg = <24520 20>;
+			linux,phandle = <24520>;
+			ethernet-phy@0 {
+				linux,phandle = <2452000>;
+				interrupt-parent = <40000>;
+				interrupts = <35 1>;
+				reg = <0>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@1 {
+				linux,phandle = <2452001>;
+				interrupt-parent = <40000>;
+				interrupts = <35 1>;
+				reg = <1>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@2 {
+				linux,phandle = <2452002>;
+				interrupt-parent = <40000>;
+				interrupts = <37 1>;
+				reg = <2>;
+				device_type = "ethernet-phy";
+			};
+		};
+
+		ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <24000 1000>;
+			address = [ 00 E0 0C 00 73 00 ];
+			local-mac-address = [ 00 E0 0C 00 73 00 ];
+			interrupts = <d 2 e 2 12 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452000>;
+		};
+
+		ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <25000 1000>;
+			address = [ 00 E0 0C 00 73 01 ];
+			local-mac-address = [ 00 E0 0C 00 73 01 ];
+			interrupts = <13 2 14 2 18 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452001>;
+		};
+
+		ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <26000 1000>;
+			address = [ 00 E0 0C 00 73 02 ];
+			local-mac-address = [ 00 E0 0C 00 73 02 ];
+			interrupts = <19 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452002>;
+		};
+
+		serial@4500 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4500 100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		serial@4600 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4600 100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+		pci@8000 {
+			linux,phandle = <8000>;
+			interrupt-map-mask = <f800 0 0 7>;
+			interrupt-map = <
+
+				/* IDSEL 0x02 */
+				1000 0 0 1 40000 31 1
+				1000 0 0 2 40000 32 1
+				1000 0 0 3 40000 33 1
+				1000 0 0 4 40000 34 1
+
+				/* IDSEL 0x03 */
+				1800 0 0 1 40000 34 1
+				1800 0 0 2 40000 31 1
+				1800 0 0 3 40000 32 1
+				1800 0 0 4 40000 33 1
+
+				/* IDSEL 0x04 */
+				2000 0 0 1 40000 33 1
+				2000 0 0 2 40000 34 1
+				2000 0 0 3 40000 31 1
+				2000 0 0 4 40000 32 1
+
+				/* IDSEL 0x05 */
+				2800 0 0 1 40000 32 1
+				2800 0 0 2 40000 33 1
+				2800 0 0 3 40000 34 1
+				2800 0 0 4 40000 31 1
+
+				/* IDSEL 0x0c */
+				6000 0 0 1 40000 31 1
+				6000 0 0 2 40000 32 1
+				6000 0 0 3 40000 33 1
+				6000 0 0 4 40000 34 1
+
+				/* IDSEL 0x0d */
+				6800 0 0 1 40000 34 1
+				6800 0 0 2 40000 31 1
+				6800 0 0 3 40000 32 1
+				6800 0 0 4 40000 33 1
+
+				/* IDSEL 0x0e */
+				7000 0 0 1 40000 33 1
+				7000 0 0 2 40000 34 1
+				7000 0 0 3 40000 31 1
+				7000 0 0 4 40000 32 1
+
+				/* IDSEL 0x0f */
+				7800 0 0 1 40000 32 1
+				7800 0 0 2 40000 33 1
+				7800 0 0 3 40000 34 1
+				7800 0 0 4 40000 31 1
+
+				/* IDSEL 0x12 */
+				9000 0 0 1 40000 31 1
+				9000 0 0 2 40000 32 1
+				9000 0 0 3 40000 33 1
+				9000 0 0 4 40000 34 1
+
+				/* IDSEL 0x13 */
+				9800 0 0 1 40000 34 1
+				9800 0 0 2 40000 31 1
+				9800 0 0 3 40000 32 1
+				9800 0 0 4 40000 33 1
+
+				/* IDSEL 0x14 */
+				a000 0 0 1 40000 33 1
+				a000 0 0 2 40000 34 1
+				a000 0 0 3 40000 31 1
+				a000 0 0 4 40000 32 1
+
+				/* IDSEL 0x15 */
+				a800 0 0 1 40000 32 1
+				a800 0 0 2 40000 33 1
+				a800 0 0 3 40000 34 1
+				a800 0 0 4 40000 31 1>;
+			interrupt-parent = <40000>;
+			interrupts = <08 2>;
+			bus-range = <0 0>;
+			ranges = <02000000 0 80000000 80000000 0 20000000
+				  01000000 0 00000000 e2000000 0 00100000>;
+			clock-frequency = <3f940aa>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <8000 1000>;
+			compatible = "85xx";
+			device_type = "pci";
+		};
+
+		pic@40000 {
+			linux,phandle = <40000>;
+			clock-frequency = <0>;
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <40000 40000>;
+			built-in;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+                        big-endian;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8541cds.dts b/arch/powerpc/boot/dts/mpc8541cds.dts
new file mode 100644
index 0000000..7be0bc6
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8541cds.dts
@@ -0,0 +1,244 @@
+/*
+ * MPC8541 CDS Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+
+/ {
+	model = "MPC8541CDS";
+	compatible = "MPC85xxCDS";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	linux,phandle = <100>;
+
+	cpus {
+		#cpus = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		linux,phandle = <200>;
+
+		PowerPC,8541@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <20>;	// 32 bytes
+			i-cache-line-size = <20>;	// 32 bytes
+			d-cache-size = <8000>;		// L1, 32K
+			i-cache-size = <8000>;		// L1, 32K
+			timebase-frequency = <0>;	//  33 MHz, from uboot
+			bus-frequency = <0>;	// 166 MHz
+			clock-frequency = <0>;	// 825 MHz, from uboot
+			32-bit;
+			linux,phandle = <201>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		linux,phandle = <300>;
+		reg = <00000000 08000000>;	// 128M at 0x0
+	};
+
+	soc8541@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		#interrupt-cells = <2>;
+		device_type = "soc";
+		ranges = <0 e0000000 00100000>;
+		reg = <e0000000 00100000>;	// CCSRBAR 1M
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			device_type = "i2c";
+			compatible = "fsl-i2c";
+			reg = <3000 100>;
+			interrupts = <1b 2>;
+			interrupt-parent = <40000>;
+			dfsrr;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "mdio";
+			compatible = "gianfar";
+			reg = <24520 20>;
+			linux,phandle = <24520>;
+			ethernet-phy@0 {
+				linux,phandle = <2452000>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <0>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@1 {
+				linux,phandle = <2452001>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <1>;
+				device_type = "ethernet-phy";
+			};
+		};
+
+		ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <24000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 00 ];
+			interrupts = <d 2 e 2 12 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452000>;
+		};
+
+		ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <25000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 01 ];
+			interrupts = <13 2 14 2 18 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452001>;
+		};
+
+		serial@4500 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4500 100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		serial@4600 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4600 100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		pci@8000 {
+			linux,phandle = <8000>;
+			interrupt-map-mask = <1f800 0 0 7>;
+			interrupt-map = <
+
+				/* IDSEL 0x10 */
+				08000 0 0 1 40000 30 1
+				08000 0 0 2 40000 31 1
+				08000 0 0 3 40000 32 1
+				08000 0 0 4 40000 33 1
+
+				/* IDSEL 0x11 */
+				08800 0 0 1 40000 30 1
+				08800 0 0 2 40000 31 1
+				08800 0 0 3 40000 32 1
+				08800 0 0 4 40000 33 1
+
+				/* IDSEL 0x12 (Slot 1) */
+				09000 0 0 1 40000 30 1
+				09000 0 0 2 40000 31 1
+				09000 0 0 3 40000 32 1
+				09000 0 0 4 40000 33 1
+
+				/* IDSEL 0x13 (Slot 2) */
+				09800 0 0 1 40000 31 1
+				09800 0 0 2 40000 32 1
+				09800 0 0 3 40000 33 1
+				09800 0 0 4 40000 30 1
+
+				/* IDSEL 0x14 (Slot 3) */
+				0a000 0 0 1 40000 32 1
+				0a000 0 0 2 40000 33 1
+				0a000 0 0 3 40000 30 1
+				0a000 0 0 4 40000 31 1
+
+				/* IDSEL 0x15 (Slot 4) */
+				0a800 0 0 1 40000 33 1
+				0a800 0 0 2 40000 30 1
+				0a800 0 0 3 40000 31 1
+				0a800 0 0 4 40000 32 1
+
+				/* Bus 1 (Tundra Bridge) */
+				/* IDSEL 0x12 (ISA bridge) */
+				19000 0 0 1 40000 30 1
+				19000 0 0 2 40000 31 1
+				19000 0 0 3 40000 32 1
+				19000 0 0 4 40000 33 1>;
+			interrupt-parent = <40000>;
+			interrupts = <08 2>;
+			bus-range = <0 0>;
+			ranges = <02000000 0 80000000 80000000 0 20000000
+				  01000000 0 00000000 e2000000 0 00100000>;
+			clock-frequency = <3f940aa>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <8000 1000>;
+			compatible = "85xx";
+			device_type = "pci";
+
+			i8259@19000 {
+				clock-frequency = <0>;
+				interrupt-controller;
+				device_type = "interrupt-controller";
+				reg = <19000 0 0 0 1>;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				built-in;
+				compatible = "chrp,iic";
+				big-endian;
+				interrupts = <1>;
+				interrupt-parent = <8000>;
+			};
+		};
+
+		pci@9000 {
+			linux,phandle = <9000>;
+			interrupt-map-mask = <f800 0 0 7>;
+			interrupt-map = <
+
+				/* IDSEL 0x15 */
+				a800 0 0 1 40000 3b 1
+				a800 0 0 2 40000 3b 1
+				a800 0 0 3 40000 3b 1
+				a800 0 0 4 40000 3b 1>;
+			interrupt-parent = <40000>;
+			interrupts = <09 2>;
+			bus-range = <0 0>;
+			ranges = <02000000 0 a0000000 a0000000 0 20000000
+				  01000000 0 00000000 e3000000 0 00100000>;
+			clock-frequency = <3f940aa>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <9000 1000>;
+			compatible = "85xx";
+			device_type = "pci";
+		};
+
+		pic@40000 {
+			linux,phandle = <40000>;
+			clock-frequency = <0>;
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <40000 40000>;
+			built-in;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+                        big-endian;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8548cds.dts b/arch/powerpc/boot/dts/mpc8548cds.dts
new file mode 100644
index 0000000..893d795
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8548cds.dts
@@ -0,0 +1,287 @@
+/*
+ * MPC8555 CDS Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+
+/ {
+	model = "MPC8548CDS";
+	compatible = "MPC85xxCDS";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	linux,phandle = <100>;
+
+	cpus {
+		#cpus = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		linux,phandle = <200>;
+
+		PowerPC,8548@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <20>;	// 32 bytes
+			i-cache-line-size = <20>;	// 32 bytes
+			d-cache-size = <8000>;		// L1, 32K
+			i-cache-size = <8000>;		// L1, 32K
+			timebase-frequency = <0>;	//  33 MHz, from uboot
+			bus-frequency = <0>;	// 166 MHz
+			clock-frequency = <0>;	// 825 MHz, from uboot
+			32-bit;
+			linux,phandle = <201>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		linux,phandle = <300>;
+		reg = <00000000 08000000>;	// 128M at 0x0
+	};
+
+	soc8548@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		#interrupt-cells = <2>;
+		device_type = "soc";
+		ranges = <0 e0000000 00100000>;
+		reg = <e0000000 00100000>;	// CCSRBAR 1M
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			device_type = "i2c";
+			compatible = "fsl-i2c";
+			reg = <3000 100>;
+			interrupts = <1b 2>;
+			interrupt-parent = <40000>;
+			dfsrr;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "mdio";
+			compatible = "gianfar";
+			reg = <24520 20>;
+			linux,phandle = <24520>;
+			ethernet-phy@0 {
+				linux,phandle = <2452000>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <0>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@1 {
+				linux,phandle = <2452001>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <1>;
+				device_type = "ethernet-phy";
+			};
+
+			ethernet-phy@2 {
+				linux,phandle = <2452002>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <2>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@3 {
+				linux,phandle = <2452003>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <3>;
+				device_type = "ethernet-phy";
+			};
+		};
+
+		ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <24000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 00 ];
+			interrupts = <d 2 e 2 12 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452000>;
+		};
+
+		ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <25000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 01 ];
+			interrupts = <13 2 14 2 18 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452001>;
+		};
+
+		ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <26000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 02 ];
+			interrupts = <f 2 10 2 11 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452001>;
+		};
+
+/* eTSEC 4 is currently broken
+		ethernet@27000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <27000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 03 ];
+			interrupts = <15 2 16 2 17 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452001>;
+		};
+ */
+
+		serial@4500 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4500 100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		serial@4600 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4600 100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		pci@8000 {
+			linux,phandle = <8000>;
+			interrupt-map-mask = <1f800 0 0 7>;
+			interrupt-map = <
+
+				/* IDSEL 0x10 */
+				08000 0 0 1 40000 30 1
+				08000 0 0 2 40000 31 1
+				08000 0 0 3 40000 32 1
+				08000 0 0 4 40000 33 1
+
+				/* IDSEL 0x11 */
+				08800 0 0 1 40000 30 1
+				08800 0 0 2 40000 31 1
+				08800 0 0 3 40000 32 1
+				08800 0 0 4 40000 33 1
+
+				/* IDSEL 0x12 (Slot 1) */
+				09000 0 0 1 40000 30 1
+				09000 0 0 2 40000 31 1
+				09000 0 0 3 40000 32 1
+				09000 0 0 4 40000 33 1
+
+				/* IDSEL 0x13 (Slot 2) */
+				09800 0 0 1 40000 31 1
+				09800 0 0 2 40000 32 1
+				09800 0 0 3 40000 33 1
+				09800 0 0 4 40000 30 1
+
+				/* IDSEL 0x14 (Slot 3) */
+				0a000 0 0 1 40000 32 1
+				0a000 0 0 2 40000 33 1
+				0a000 0 0 3 40000 30 1
+				0a000 0 0 4 40000 31 1
+
+				/* IDSEL 0x15 (Slot 4) */
+				0a800 0 0 1 40000 33 1
+				0a800 0 0 2 40000 30 1
+				0a800 0 0 3 40000 31 1
+				0a800 0 0 4 40000 32 1
+
+				/* Bus 1 (Tundra Bridge) */
+				/* IDSEL 0x12 (ISA bridge) */
+				19000 0 0 1 40000 30 1
+				19000 0 0 2 40000 31 1
+				19000 0 0 3 40000 32 1
+				19000 0 0 4 40000 33 1>;
+			interrupt-parent = <40000>;
+			interrupts = <08 2>;
+			bus-range = <0 0>;
+			ranges = <02000000 0 80000000 80000000 0 20000000
+				  01000000 0 00000000 e2000000 0 00100000>;
+			clock-frequency = <3f940aa>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <8000 1000>;
+			compatible = "85xx";
+			device_type = "pci";
+
+			i8259@19000 {
+				clock-frequency = <0>;
+				interrupt-controller;
+				device_type = "interrupt-controller";
+				reg = <19000 0 0 0 1>;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				built-in;
+				compatible = "chrp,iic";
+				big-endian;
+				interrupts = <1>;
+				interrupt-parent = <8000>;
+			};
+		};
+
+		pci@9000 {
+			linux,phandle = <9000>;
+			interrupt-map-mask = <f800 0 0 7>;
+			interrupt-map = <
+
+				/* IDSEL 0x15 */
+				a800 0 0 1 40000 3b 1
+				a800 0 0 2 40000 3b 1
+				a800 0 0 3 40000 3b 1
+				a800 0 0 4 40000 3b 1>;
+			interrupt-parent = <40000>;
+			interrupts = <09 2>;
+			bus-range = <0 0>;
+			ranges = <02000000 0 a0000000 a0000000 0 20000000
+				  01000000 0 00000000 e3000000 0 00100000>;
+			clock-frequency = <3f940aa>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <9000 1000>;
+			compatible = "85xx";
+			device_type = "pci";
+		};
+
+		pic@40000 {
+			linux,phandle = <40000>;
+			clock-frequency = <0>;
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <40000 40000>;
+			built-in;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+                        big-endian;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8555cds.dts b/arch/powerpc/boot/dts/mpc8555cds.dts
new file mode 100644
index 0000000..118f5a8
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8555cds.dts
@@ -0,0 +1,244 @@
+/*
+ * MPC8555 CDS Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+
+/ {
+	model = "MPC8555CDS";
+	compatible = "MPC85xxCDS";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	linux,phandle = <100>;
+
+	cpus {
+		#cpus = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		linux,phandle = <200>;
+
+		PowerPC,8555@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <20>;	// 32 bytes
+			i-cache-line-size = <20>;	// 32 bytes
+			d-cache-size = <8000>;		// L1, 32K
+			i-cache-size = <8000>;		// L1, 32K
+			timebase-frequency = <0>;	//  33 MHz, from uboot
+			bus-frequency = <0>;	// 166 MHz
+			clock-frequency = <0>;	// 825 MHz, from uboot
+			32-bit;
+			linux,phandle = <201>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		linux,phandle = <300>;
+		reg = <00000000 08000000>;	// 128M at 0x0
+	};
+
+	soc8555@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		#interrupt-cells = <2>;
+		device_type = "soc";
+		ranges = <0 e0000000 00100000>;
+		reg = <e0000000 00100000>;	// CCSRBAR 1M
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			device_type = "i2c";
+			compatible = "fsl-i2c";
+			reg = <3000 100>;
+			interrupts = <1b 2>;
+			interrupt-parent = <40000>;
+			dfsrr;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "mdio";
+			compatible = "gianfar";
+			reg = <24520 20>;
+			linux,phandle = <24520>;
+			ethernet-phy@0 {
+				linux,phandle = <2452000>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <0>;
+				device_type = "ethernet-phy";
+			};
+			ethernet-phy@1 {
+				linux,phandle = <2452001>;
+				interrupt-parent = <40000>;
+				interrupts = <35 0>;
+				reg = <1>;
+				device_type = "ethernet-phy";
+			};
+		};
+
+		ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <24000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 00 ];
+			interrupts = <0d 2 0e 2 12 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452000>;
+		};
+
+		ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <25000 1000>;
+			local-mac-address = [ 00 E0 0C 00 73 01 ];
+			interrupts = <13 2 14 2 18 2>;
+			interrupt-parent = <40000>;
+			phy-handle = <2452001>;
+		};
+
+		serial@4500 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4500 100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		serial@4600 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <4600 100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <1a 2>;
+			interrupt-parent = <40000>;
+		};
+
+		pci@8000 {
+			linux,phandle = <8000>;
+			interrupt-map-mask = <1f800 0 0 7>;
+			interrupt-map = <
+
+				/* IDSEL 0x10 */
+				08000 0 0 1 40000 30 1
+				08000 0 0 2 40000 31 1
+				08000 0 0 3 40000 32 1
+				08000 0 0 4 40000 33 1
+
+				/* IDSEL 0x11 */
+				08800 0 0 1 40000 30 1
+				08800 0 0 2 40000 31 1
+				08800 0 0 3 40000 32 1
+				08800 0 0 4 40000 33 1
+
+				/* IDSEL 0x12 (Slot 1) */
+				09000 0 0 1 40000 30 1
+				09000 0 0 2 40000 31 1
+				09000 0 0 3 40000 32 1
+				09000 0 0 4 40000 33 1
+
+				/* IDSEL 0x13 (Slot 2) */
+				09800 0 0 1 40000 31 1
+				09800 0 0 2 40000 32 1
+				09800 0 0 3 40000 33 1
+				09800 0 0 4 40000 30 1
+
+				/* IDSEL 0x14 (Slot 3) */
+				0a000 0 0 1 40000 32 1
+				0a000 0 0 2 40000 33 1
+				0a000 0 0 3 40000 30 1
+				0a000 0 0 4 40000 31 1
+
+				/* IDSEL 0x15 (Slot 4) */
+				0a800 0 0 1 40000 33 1
+				0a800 0 0 2 40000 30 1
+				0a800 0 0 3 40000 31 1
+				0a800 0 0 4 40000 32 1
+
+				/* Bus 1 (Tundra Bridge) */
+				/* IDSEL 0x12 (ISA bridge) */
+				19000 0 0 1 40000 30 1
+				19000 0 0 2 40000 31 1
+				19000 0 0 3 40000 32 1
+				19000 0 0 4 40000 33 1>;
+			interrupt-parent = <40000>;
+			interrupts = <08 2>;
+			bus-range = <0 0>;
+			ranges = <02000000 0 80000000 80000000 0 20000000
+				  01000000 0 00000000 e2000000 0 00100000>;
+			clock-frequency = <3f940aa>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <8000 1000>;
+			compatible = "85xx";
+			device_type = "pci";
+
+			i8259@19000 {
+				clock-frequency = <0>;
+				interrupt-controller;
+				device_type = "interrupt-controller";
+				reg = <19000 0 0 0 1>;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				built-in;
+				compatible = "chrp,iic";
+				big-endian;
+				interrupts = <1>;
+				interrupt-parent = <8000>;
+			};
+		};
+
+		pci@9000 {
+			linux,phandle = <9000>;
+			interrupt-map-mask = <f800 0 0 7>;
+			interrupt-map = <
+
+				/* IDSEL 0x15 */
+				a800 0 0 1 40000 3b 1
+				a800 0 0 2 40000 3b 1
+				a800 0 0 3 40000 3b 1
+				a800 0 0 4 40000 3b 1>;
+			interrupt-parent = <40000>;
+			interrupts = <09 2>;
+			bus-range = <0 0>;
+			ranges = <02000000 0 a0000000 a0000000 0 20000000
+				  01000000 0 00000000 e3000000 0 00100000>;
+			clock-frequency = <3f940aa>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <9000 1000>;
+			compatible = "85xx";
+			device_type = "pci";
+		};
+
+		pic@40000 {
+			linux,phandle = <40000>;
+			clock-frequency = <0>;
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <40000 40000>;
+			built-in;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+                        big-endian;
+		};
+	};
+};
-- 
cgit v0.10.2


From 343832734fac000d2d276ccc41955daded1265f5 Mon Sep 17 00:00:00 2001
From: Jon Loeliger <jdl@freescale.com>
Date: Fri, 18 Aug 2006 14:30:35 -0500
Subject: [POWERPC] Rewrite the PPC 86xx IRQ handling to use Flat Device Tree

IRQ setup now comes from the Flat Device Tree and use the new generic
IRQ code.  Fixed the fsl_soc.c IRQ OF interrupt node parsing.
Removed some unused MPC86xx macro definition.

Signed-off-by: Zhang Wei <wei.zhang@freescale.com>
Signed-off-by: Jon Loeliger <jdl@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
(cherry picked from 919fede6edab94cccb3ca8c1c0b32fa62c9369a5 commit)

diff --git a/arch/powerpc/platforms/86xx/mpc8641_hpcn.h b/arch/powerpc/platforms/86xx/mpc8641_hpcn.h
index 5d2bcf7..41e554c 100644
--- a/arch/powerpc/platforms/86xx/mpc8641_hpcn.h
+++ b/arch/powerpc/platforms/86xx/mpc8641_hpcn.h
@@ -16,38 +16,6 @@
 
 #include <linux/init.h>
 
-/* PCI interrupt controller */
-#define PIRQA		3
-#define PIRQB		4
-#define PIRQC		5
-#define PIRQD		6
-#define PIRQ7		7
-#define PIRQE		9
-#define PIRQF		10
-#define PIRQG		11
-#define PIRQH		12
-
-/* PCI-Express memory map */
-#define MPC86XX_PCIE_LOWER_IO        0x00000000
-#define MPC86XX_PCIE_UPPER_IO        0x00ffffff
-
-#define MPC86XX_PCIE_LOWER_MEM       0x80000000
-#define MPC86XX_PCIE_UPPER_MEM       0x9fffffff
-
-#define MPC86XX_PCIE_IO_BASE         0xe2000000
-#define MPC86XX_PCIE_MEM_OFFSET      0x00000000
-
-#define MPC86XX_PCIE_IO_SIZE         0x01000000
-
-#define PCIE1_CFG_ADDR_OFFSET    (0x8000)
-#define PCIE1_CFG_DATA_OFFSET    (0x8004)
-
-#define PCIE2_CFG_ADDR_OFFSET    (0x9000)
-#define PCIE2_CFG_DATA_OFFSET    (0x9004)
-
-#define MPC86xx_PCIE_OFFSET PCIE1_CFG_ADDR_OFFSET
-#define MPC86xx_PCIE_SIZE	(0x1000)
-
 #define MPC86XX_RSTCR_OFFSET	(0xe00b0)	/* Reset Control Register */
 
 #endif	/* __MPC8641_HPCN_H__ */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index ebae73e..146da30 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -37,6 +37,14 @@
 #include "mpc86xx.h"
 #include "mpc8641_hpcn.h"
 
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(KERN_ERR fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
 #ifndef CONFIG_PCI
 unsigned long isa_io_base = 0;
 unsigned long isa_mem_base = 0;
@@ -44,205 +52,215 @@ unsigned long pci_dram_offset = 0;
 #endif
 
 
-/*
- * Internal interrupts are all Level Sensitive, and Positive Polarity
- */
-
-static u_char mpc86xx_hpcn_openpic_initsenses[] __initdata = {
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  0: Reserved */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  1: MCM */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  2: DDR DRAM */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  3: LBIU */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  4: DMA 0 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  5: DMA 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  6: DMA 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  7: DMA 3 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  8: PCIE1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal  9: PCIE2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 10: Reserved */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 11: Reserved */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 12: DUART2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 13: TSEC 1 Transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 14: TSEC 1 Receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 15: TSEC 3 transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 16: TSEC 3 receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 17: TSEC 3 error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 18: TSEC 1 Receive/Transmit Error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 19: TSEC 2 Transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 20: TSEC 2 Receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 21: TSEC 4 transmit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 22: TSEC 4 receive */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 23: TSEC 4 error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 24: TSEC 2 Receive/Transmit Error */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 25: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 26: DUART1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 27: I2C */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 28: Performance Monitor */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 29: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 30: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 31: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 32: SRIO error/write-port unit */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 33: SRIO outbound doorbell */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 34: SRIO inbound doorbell */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 35: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 36: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 37: SRIO outbound message unit 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 38: SRIO inbound message unit 1 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 39: SRIO outbound message unit 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 40: SRIO inbound message unit 2 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 41: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 42: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 43: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 44: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 45: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 46: Unused */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* Internal 47: Unused */
-	0x0,						/* External  0: */
-	0x0,						/* External  1: */
-	0x0,						/* External  2: */
-	0x0,						/* External  3: */
-	0x0,						/* External  4: */
-	0x0,						/* External  5: */
-	0x0,						/* External  6: */
-	0x0,						/* External  7: */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* External  8: Pixis FPGA */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* External  9: ULI 8259 INTR Cascade */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* External 10: Quad ETH PHY */
-	0x0,						/* External 11: */
-	0x0,
-	0x0,
-	0x0,
-	0x0,
-};
-
+static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc,
+				 struct pt_regs *regs)
+{
+	unsigned int cascade_irq = i8259_irq(regs);
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq, regs);
+	desc->chip->eoi(irq);
+}
 
 void __init
 mpc86xx_hpcn_init_irq(void)
 {
 	struct mpic *mpic1;
+	struct device_node *np, *cascade_node = NULL;
+	int cascade_irq;
 	phys_addr_t openpic_paddr;
 
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (np == NULL)
+		return;
+
 	/* Determine the Physical Address of the OpenPIC regs */
 	openpic_paddr = get_immrbase() + MPC86xx_OPENPIC_OFFSET;
 
 	/* Alloc mpic structure and per isu has 16 INT entries. */
-	mpic1 = mpic_alloc(openpic_paddr,
+	mpic1 = mpic_alloc(np, openpic_paddr,
 			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
-			16, MPC86xx_OPENPIC_IRQ_OFFSET, 0, 250,
-			mpc86xx_hpcn_openpic_initsenses,
-			sizeof(mpc86xx_hpcn_openpic_initsenses),
+			16, NR_IRQS - 4,
 			" MPIC     ");
 	BUG_ON(mpic1 == NULL);
 
+	mpic_assign_isu(mpic1, 0, openpic_paddr + 0x10000);
+
 	/* 48 Internal Interrupts */
-	mpic_assign_isu(mpic1, 0, openpic_paddr + 0x10200);
-	mpic_assign_isu(mpic1, 1, openpic_paddr + 0x10400);
-	mpic_assign_isu(mpic1, 2, openpic_paddr + 0x10600);
+	mpic_assign_isu(mpic1, 1, openpic_paddr + 0x10200);
+	mpic_assign_isu(mpic1, 2, openpic_paddr + 0x10400);
+	mpic_assign_isu(mpic1, 3, openpic_paddr + 0x10600);
 
-	/* 16 External interrupts */
-	mpic_assign_isu(mpic1, 3, openpic_paddr + 0x10000);
+	/* 16 External interrupts
+	 * Moving them from [0 - 15] to [64 - 79]
+	 */
+	mpic_assign_isu(mpic1, 4, openpic_paddr + 0x10000);
 
 	mpic_init(mpic1);
 
 #ifdef CONFIG_PCI
-	mpic_setup_cascade(MPC86xx_IRQ_EXT9, i8259_irq_cascade, NULL);
-	i8259_init(0, I8259_OFFSET);
-#endif
-}
+	/* Initialize i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "chrp,iic")) {
+			cascade_node = np;
+			break;
+		}
+	if (cascade_node == NULL) {
+		printk(KERN_DEBUG "mpc86xxhpcn: no ISA interrupt controller\n");
+		return;
+	}
 
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (cascade_irq == NO_IRQ) {
+		printk(KERN_ERR "mpc86xxhpcn: failed to map cascade interrupt");
+		return;
+	}
+	DBG("mpc86xxhpcn: cascade mapped to irq %d\n", cascade_irq);
 
+	i8259_init(cascade_node, 0);
+	set_irq_chained_handler(cascade_irq, mpc86xx_8259_cascade);
+#endif
+}
 
 #ifdef CONFIG_PCI
-/*
- * interrupt routing
- */
 
-int
-mpc86xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin)
+enum pirq{PIRQA = 8, PIRQB, PIRQC, PIRQD, PIRQE, PIRQF, PIRQG, PIRQH};
+const unsigned char uli1575_irq_route_table[16] = {
+	0, 	/* 0: Reserved */
+	0x8, 	/* 1: 0b1000 */
+	0, 	/* 2: Reserved */
+	0x2,	/* 3: 0b0010 */
+	0x4,	/* 4: 0b0100 */
+	0x5, 	/* 5: 0b0101 */
+	0x7,	/* 6: 0b0111 */
+	0x6,	/* 7: 0b0110 */
+	0, 	/* 8: Reserved */
+	0x1,	/* 9: 0b0001 */
+	0x3,	/* 10: 0b0011 */
+	0x9,	/* 11: 0b1001 */
+	0xb,	/* 12: 0b1011 */
+	0, 	/* 13: Reserved */
+	0xd,	/* 14, 0b1101 */
+	0xf,	/* 15, 0b1111 */
+};
+
+static int __devinit
+get_pci_irq_from_of(struct pci_controller *hose, int slot, int pin)
 {
-	static char pci_irq_table[][4] = {
-		/*
-		 *      PCI IDSEL/INTPIN->INTLINE
-		 *       A      B      C      D
-		 */
-		{PIRQA, PIRQB, PIRQC, PIRQD},   /* IDSEL 17 -- PCI Slot 1 */
-		{PIRQB, PIRQC, PIRQD, PIRQA},	/* IDSEL 18 -- PCI Slot 2 */
-		{0, 0, 0, 0},			/* IDSEL 19 */
-		{0, 0, 0, 0},			/* IDSEL 20 */
-		{0, 0, 0, 0},			/* IDSEL 21 */
-		{0, 0, 0, 0},			/* IDSEL 22 */
-		{0, 0, 0, 0},			/* IDSEL 23 */
-		{0, 0, 0, 0},			/* IDSEL 24 */
-		{0, 0, 0, 0},			/* IDSEL 25 */
-		{PIRQD, PIRQA, PIRQB, PIRQC},	/* IDSEL 26 -- PCI Bridge*/
-		{PIRQC, 0, 0, 0},		/* IDSEL 27 -- LAN */
-		{PIRQE, PIRQF, PIRQH, PIRQ7},	/* IDSEL 28 -- USB 1.1 */
-		{PIRQE, PIRQF, PIRQG, 0},	/* IDSEL 29 -- Audio & Modem */
-		{PIRQH, 0, 0, 0},		/* IDSEL 30 -- LPC & PMU*/
-		{PIRQD, 0, 0, 0},		/* IDSEL 31 -- ATA */
-	};
-
-	const long min_idsel = 17, max_idsel = 31, irqs_per_slot = 4;
-	return PCI_IRQ_TABLE_LOOKUP + I8259_OFFSET;
+	struct of_irq oirq;
+	u32 laddr[3];
+	struct device_node *hosenode = hose ? hose->arch_data : NULL;
+
+	if (!hosenode) return -EINVAL;
+
+	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(slot, 0) << 8);
+	laddr[1] = laddr[2] = 0;
+	of_irq_map_raw(hosenode, &pin, laddr, &oirq);
+	DBG("mpc86xx_hpcn: pci irq addr %x, slot %d, pin %d, irq %d\n",
+			laddr[0], slot, pin, oirq.specifier[0]);
+	return oirq.specifier[0];
 }
 
-static void __devinit quirk_ali1575(struct pci_dev *dev)
+static void __devinit quirk_uli1575(struct pci_dev *dev)
 {
 	unsigned short temp;
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	unsigned char irq2pin[16];
+	unsigned long pirq_map_word = 0;
+	u32 irq;
+	int i;
 
 	/*
-	 * ALI1575 interrupts route table setup:
+	 * ULI1575 interrupts route setup
+	 */
+	memset(irq2pin, 0, 16); /* Initialize default value 0 */
+
+	/*
+	 * PIRQA -> PIRQD mapping read from OF-tree
+	 *
+	 * interrupts for PCI slot0 -- PIRQA / PIRQB / PIRQC / PIRQD
+	 *                PCI slot1 -- PIRQB / PIRQC / PIRQD / PIRQA
+	 */
+	for (i = 0; i < 4; i++){
+		irq = get_pci_irq_from_of(hose, 17, i + 1);
+		if (irq > 0 && irq < 16)
+			irq2pin[irq] = PIRQA + i;
+		else
+			printk(KERN_WARNING "ULI1575 device"
+			    "(slot %d, pin %d) irq %d is invalid.\n",
+			    17, i, irq);
+	}
+
+	/*
+	 * PIRQE -> PIRQF mapping set manually
 	 *
 	 * IRQ pin   IRQ#
-	 * PIRQA ---- 3
-	 * PIRQB ---- 4
-	 * PIRQC ---- 5
-	 * PIRQD ---- 6
 	 * PIRQE ---- 9
 	 * PIRQF ---- 10
 	 * PIRQG ---- 11
 	 * PIRQH ---- 12
-	 *
-	 * interrupts for PCI slot0 -- PIRQA / PIRQB / PIRQC / PIRQD
-	 *                PCI slot1 -- PIRQB / PIRQC / PIRQD / PIRQA
 	 */
-	pci_write_config_dword(dev, 0x48, 0xb9317542);
+	for (i = 0; i < 4; i++) irq2pin[i + 9] = PIRQE + i;
+
+	/* Set IRQ-PIRQ Mapping to ULI1575 */
+	for (i = 0; i < 16; i++)
+		if (irq2pin[i])
+			pirq_map_word |= (uli1575_irq_route_table[i] & 0xf)
+				<< ((irq2pin[i] - PIRQA) * 4);
 
-	/* USB 1.1 OHCI controller 1, interrupt: PIRQE */
-	pci_write_config_byte(dev, 0x86, 0x0c);
+	/* ULI1575 IRQ mapping conf register default value is 0xb9317542 */
+	DBG("Setup ULI1575 IRQ mapping configuration register value = 0x%x\n",
+			pirq_map_word);
+	pci_write_config_dword(dev, 0x48, pirq_map_word);
 
-	/* USB 1.1 OHCI controller 2, interrupt: PIRQF */
-	pci_write_config_byte(dev, 0x87, 0x0d);
+#define ULI1575_SET_DEV_IRQ(slot, pin, reg) 				\
+	do { 								\
+		int irq; 						\
+		irq = get_pci_irq_from_of(hose, slot, pin); 		\
+		if (irq > 0 && irq < 16) 				\
+			pci_write_config_byte(dev, reg, irq2pin[irq]); 	\
+		else							\
+			printk(KERN_WARNING "ULI1575 device"		\
+			    "(slot %d, pin %d) irq %d is invalid.\n",	\
+			    slot, pin, irq);				\
+	} while(0)
 
-	/* USB 1.1 OHCI controller 3, interrupt: PIRQH */
-	pci_write_config_byte(dev, 0x88, 0x0f);
+	/* USB 1.1 OHCI controller 1, slot 28, pin 1 */
+	ULI1575_SET_DEV_IRQ(28, 1, 0x86);
 
-	/* USB 2.0 controller, interrupt: PIRQ7 */
-	pci_write_config_byte(dev, 0x74, 0x06);
+	/* USB 1.1 OHCI controller 2, slot 28, pin 2 */
+	ULI1575_SET_DEV_IRQ(28, 2, 0x87);
 
-	/* Audio controller, interrupt: PIRQE */
-	pci_write_config_byte(dev, 0x8a, 0x0c);
+	/* USB 1.1 OHCI controller 3, slot 28, pin 3 */
+	ULI1575_SET_DEV_IRQ(28, 3, 0x88);
 
-	/* Modem controller, interrupt: PIRQF */
-	pci_write_config_byte(dev, 0x8b, 0x0d);
+	/* USB 2.0 controller, slot 28, pin 4 */
+	irq = get_pci_irq_from_of(hose, 28, 4);
+	if (irq >= 0 && irq <=15)
+		pci_write_config_dword(dev, 0x74, uli1575_irq_route_table[irq]);
 
-	/* HD audio controller, interrupt: PIRQG */
-	pci_write_config_byte(dev, 0x8c, 0x0e);
+	/* Audio controller, slot 29, pin 1 */
+	ULI1575_SET_DEV_IRQ(29, 1, 0x8a);
 
-	/* Serial ATA interrupt: PIRQD */
-	pci_write_config_byte(dev, 0x8d, 0x0b);
+	/* Modem controller, slot 29, pin 2 */
+	ULI1575_SET_DEV_IRQ(29, 2, 0x8b);
 
-	/* SMB interrupt: PIRQH */
-	pci_write_config_byte(dev, 0x8e, 0x0f);
+	/* HD audio controller, slot 29, pin 3 */
+	ULI1575_SET_DEV_IRQ(29, 3, 0x8c);
 
-	/* PMU ACPI SCI interrupt: PIRQH */
-	pci_write_config_byte(dev, 0x8f, 0x0f);
+	/* SMB interrupt: slot 30, pin 1 */
+	ULI1575_SET_DEV_IRQ(30, 1, 0x8e);
+
+	/* PMU ACPI SCI interrupt: slot 30, pin 2 */
+	ULI1575_SET_DEV_IRQ(30, 2, 0x8f);
+
+	/* Serial ATA interrupt: slot 31, pin 1 */
+	ULI1575_SET_DEV_IRQ(31, 1, 0x8d);
 
 	/* Primary PATA IDE IRQ: 14
 	 * Secondary PATA IDE IRQ: 15
 	 */
-	pci_write_config_byte(dev, 0x44, 0x3d);
-	pci_write_config_byte(dev, 0x75, 0x0f);
+	pci_write_config_byte(dev, 0x44, 0x30 | uli1575_irq_route_table[14]);
+	pci_write_config_byte(dev, 0x75, uli1575_irq_route_table[15]);
 
 	/* Set IRQ14 and IRQ15 to legacy IRQs */
 	pci_read_config_word(dev, 0x46, &temp);
@@ -264,6 +282,8 @@ static void __devinit quirk_ali1575(struct pci_dev *dev)
 	 */
 	outb(0xfa, 0x4d0);
 	outb(0x1e, 0x4d1);
+
+#undef ULI1575_SET_DEV_IRQ
 }
 
 static void __devinit quirk_uli5288(struct pci_dev *dev)
@@ -306,7 +326,7 @@ static void __devinit early_uli5249(struct pci_dev *dev)
 	dev->class |= 0x1;
 }
 
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_ali1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249);
@@ -337,8 +357,6 @@ mpc86xx_hpcn_setup_arch(void)
 	for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;)
 		add_bridge(np);
 
-	ppc_md.pci_swizzle = common_swizzle;
-	ppc_md.pci_map_irq = mpc86xx_map_irq;
 	ppc_md.pci_exclude_device = mpc86xx_exclude_device;
 #endif
 
@@ -377,6 +395,15 @@ mpc86xx_hpcn_show_cpuinfo(struct seq_file *m)
 }
 
 
+void __init mpc86xx_hpcn_pcibios_fixup(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
+}
+
+
 /*
  * Called very early, device-tree isn't unflattened
  */
@@ -431,6 +458,7 @@ define_machine(mpc86xx_hpcn) {
 	.setup_arch		= mpc86xx_hpcn_setup_arch,
 	.init_IRQ		= mpc86xx_hpcn_init_irq,
 	.show_cpuinfo		= mpc86xx_hpcn_show_cpuinfo,
+	.pcibios_fixup		= mpc86xx_hpcn_pcibios_fixup,
 	.get_irq		= mpic_get_irq,
 	.restart		= mpc86xx_restart,
 	.time_init		= mpc86xx_time_init,
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 12b6560..ef10bcf 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -85,11 +85,8 @@ static int __init gfar_mdio_of_init(void)
 			mdio_data.irq[k] = -1;
 
 		while ((child = of_get_next_child(np, child)) != NULL) {
-			if (child->n_intrs) {
-				u32 *id =
-				    (u32 *) get_property(child, "reg", NULL);
-				mdio_data.irq[*id] = child->intrs[0].line;
-			}
+			u32 *id = get_property(child, "reg", NULL);
+			mdio_data.irq[*id] = irq_of_parse_and_map(child, 0);
 		}
 
 		ret =
@@ -131,6 +128,7 @@ static int __init gfar_of_init(void)
 		char *model;
 		void *mac_addr;
 		phandle *ph;
+		int n_res = 1;
 
 		memset(r, 0, sizeof(r));
 		memset(&gfar_data, 0, sizeof(gfar_data));
@@ -139,8 +137,7 @@ static int __init gfar_of_init(void)
 		if (ret)
 			goto err;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		model = get_property(np, "model", NULL);
@@ -150,19 +147,19 @@ static int __init gfar_of_init(void)
 			r[1].name = gfar_tx_intr;
 
 			r[2].name = gfar_rx_intr;
-			r[2].start = np->intrs[1].line;
-			r[2].end = np->intrs[1].line;
+			r[2].start = r[2].end = irq_of_parse_and_map(np, 1);
 			r[2].flags = IORESOURCE_IRQ;
 
 			r[3].name = gfar_err_intr;
-			r[3].start = np->intrs[2].line;
-			r[3].end = np->intrs[2].line;
+			r[3].start = r[3].end = irq_of_parse_and_map(np, 2);
 			r[3].flags = IORESOURCE_IRQ;
+
+			n_res += 2;
 		}
 
 		gfar_dev =
 		    platform_device_register_simple("fsl-gianfar", i, &r[0],
-						    np->n_intrs + 1);
+						    n_res + 1);
 
 		if (IS_ERR(gfar_dev)) {
 			ret = PTR_ERR(gfar_dev);
@@ -259,8 +256,7 @@ static int __init fsl_i2c_of_init(void)
 		if (ret)
 			goto err;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		i2c_dev = platform_device_register_simple("fsl-i2c", i, r, 2);
@@ -396,8 +392,7 @@ static int __init fsl_usb_of_init(void)
 		if (ret)
 			goto err;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		usb_dev_mph =
@@ -445,8 +440,7 @@ static int __init fsl_usb_of_init(void)
 		if (ret)
 			goto unreg_mph;
 
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
 
 		usb_dev_dr =
-- 
cgit v0.10.2


From aa74a30be971c632d734e487df42278b1cf85151 Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Mon, 21 Aug 2006 14:29:28 -0500
Subject: [POWERPC] Fix FEC node in 8540 ADS dts

* Fixed the FEC node, and its accompanying PHY
* Fixed a spacing issue in the PIC node

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/boot/dts/mpc8540ads.dts b/arch/powerpc/boot/dts/mpc8540ads.dts
index 93d2c2d..5f41c1f 100644
--- a/arch/powerpc/boot/dts/mpc8540ads.dts
+++ b/arch/powerpc/boot/dts/mpc8540ads.dts
@@ -83,11 +83,11 @@
 				reg = <1>;
 				device_type = "ethernet-phy";
 			};
-			ethernet-phy@2 {
-				linux,phandle = <2452002>;
+			ethernet-phy@3 {
+				linux,phandle = <2452003>;
 				interrupt-parent = <40000>;
 				interrupts = <37 1>;
-				reg = <2>;
+				reg = <3>;
 				device_type = "ethernet-phy";
 			};
 		};
@@ -124,14 +124,14 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 			device_type = "network";
-			model = "TSEC";
+			model = "FEC";
 			compatible = "gianfar";
 			reg = <26000 1000>;
 			address = [ 00 E0 0C 00 73 02 ];
 			local-mac-address = [ 00 E0 0C 00 73 02 ];
 			interrupts = <19 2>;
 			interrupt-parent = <40000>;
-			phy-handle = <2452002>;
+			phy-handle = <2452003>;
 		};
 
 		serial@4500 {
@@ -251,7 +251,7 @@
 			built-in;
 			compatible = "chrp,open-pic";
 			device_type = "open-pic";
-                        big-endian;
+			big-endian;
 		};
 	};
 };
-- 
cgit v0.10.2


From 5db9fa9593e2ff69f2b95f9d59229dc4faaa564d Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Tue, 22 Aug 2006 20:36:05 -0500
Subject: [POWERPC] Fix gettimeofday inaccuracies

There are two problems in the powerpc gettimeofday code which can
cause incorrect results to be returned.

The first is that there is a race between do_gettimeofday and the
timer interrupt:

1. do_gettimeofday does get_tb()

2. decrementer exception on boot cpu which runs timer_recalc_offset,
   which also samples the timebase and updates the do_gtod structure
   with a greater timebase value.

3. do_gettimeofday calls __do_gettimeofday, which leads to the
   negative result from tb_val - temp_varp->tb_orig_stamp.

The second is caused by taking the boot cpu offline, which can cause
the value of tb_last_jiffy to be increased past the currently
available timebase, causing the same underflow as above.

[paulus@samba.org - define and use data_barrier() instead of mb().]

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 774c0a3..18e59e4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -417,7 +417,7 @@ static __inline__ void timer_check_rtc(void)
 /*
  * This version of gettimeofday has microsecond resolution.
  */
-static inline void __do_gettimeofday(struct timeval *tv, u64 tb_val)
+static inline void __do_gettimeofday(struct timeval *tv)
 {
 	unsigned long sec, usec;
 	u64 tb_ticks, xsec;
@@ -431,7 +431,12 @@ static inline void __do_gettimeofday(struct timeval *tv, u64 tb_val)
 	 * without a divide (and in fact, without a multiply)
 	 */
 	temp_varp = do_gtod.varp;
-	tb_ticks = tb_val - temp_varp->tb_orig_stamp;
+
+	/* Sampling the time base must be done after loading
+	 * do_gtod.varp in order to avoid racing with update_gtod.
+	 */
+	data_barrier(temp_varp);
+	tb_ticks = get_tb() - temp_varp->tb_orig_stamp;
 	temp_tb_to_xs = temp_varp->tb_to_xs;
 	temp_stamp_xsec = temp_varp->stamp_xsec;
 	xsec = temp_stamp_xsec + mulhdu(tb_ticks, temp_tb_to_xs);
@@ -464,7 +469,7 @@ void do_gettimeofday(struct timeval *tv)
 		tv->tv_usec = usec;
 		return;
 	}
-	__do_gettimeofday(tv, get_tb());
+	__do_gettimeofday(tv);
 }
 
 EXPORT_SYMBOL(do_gettimeofday);
@@ -650,6 +655,7 @@ void timer_interrupt(struct pt_regs * regs)
 	int next_dec;
 	int cpu = smp_processor_id();
 	unsigned long ticks;
+	u64 tb_next_jiffy;
 
 #ifdef CONFIG_PPC32
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
@@ -691,11 +697,14 @@ void timer_interrupt(struct pt_regs * regs)
 			continue;
 
 		write_seqlock(&xtime_lock);
-		tb_last_jiffy += tb_ticks_per_jiffy;
-		tb_last_stamp = per_cpu(last_jiffy, cpu);
-		do_timer(regs);
-		timer_recalc_offset(tb_last_jiffy);
-		timer_check_rtc();
+		tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
+		if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
+			tb_last_jiffy = tb_next_jiffy;
+			tb_last_stamp = per_cpu(last_jiffy, cpu);
+			do_timer(regs);
+			timer_recalc_offset(tb_last_jiffy);
+			timer_check_rtc();
+		}
 		write_sequnlock(&xtime_lock);
 	}
 	
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 7307aa7..4c9f522 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -53,6 +53,15 @@
 #define smp_read_barrier_depends()	do { } while(0)
 #endif /* CONFIG_SMP */
 
+/*
+ * This is a barrier which prevents following instructions from being
+ * started until the value of the argument x is known.  For example, if
+ * x is a variable loaded from memory, this prevents following
+ * instructions from being executed until the load has been performed.
+ */
+#define data_barrier(x)	\
+	asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
+
 struct task_struct;
 struct pt_regs;
 
-- 
cgit v0.10.2


From 6cdd2bdfb9e2449f1c8a0f729cdf9cfd733dd63f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 23 Aug 2006 11:45:12 +1000
Subject: [POWERPC] Fix BootX booting with an initrd

The bootx_init.c trampoline didn't properly add the ramdisk to the
"reserve map" (list of reserved areas of memory), thus causing all sorts
of failures when using BootX with an initrd. Also fixes a possible
problem if the ramdisk is located before the device-tree passed by
BootX.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 6a026c7..9d73d02 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -411,8 +411,15 @@ static unsigned long __init bootx_flatten_dt(unsigned long start)
 	DBG("End of boot params: %x\n", mem_end);
 	rsvmap[0] = mem_start;
 	rsvmap[1] = mem_end;
-	rsvmap[2] = 0;
-	rsvmap[3] = 0;
+	if (bootx_info->ramDisk) {
+		rsvmap[2] = ((unsigned long)bootx_info) + bootx_info->ramDisk;
+		rsvmap[3] = rsvmap[2] + bootx_info->ramDiskSize;
+		rsvmap[4] = 0;
+		rsvmap[5] = 0;
+	} else {
+		rsvmap[2] = 0;
+		rsvmap[3] = 0;
+	}
 
 	return (unsigned long)hdr;
 }
@@ -543,12 +550,12 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	 */
 	if (bi->version < 5) {
 		space = bi->deviceTreeOffset + bi->deviceTreeSize;
-		if (bi->ramDisk)
+		if (bi->ramDisk >= space)
 			space = bi->ramDisk + bi->ramDiskSize;
 	} else
 		space = bi->totalParamsSize;
 
-	bootx_printf("Total space used by parameters & ramdisk: %x \n", space);
+	bootx_printf("Total space used by parameters & ramdisk: 0x%x \n", space);
 
 	/* New BootX will have flushed all TLBs and enters kernel with
 	 * MMU switched OFF, so this should not be useful anymore.
-- 
cgit v0.10.2


From c4342ff92bed26f2e0e3543a9f0fdc5f3a29b8fd Mon Sep 17 00:00:00 2001
From: Zang Roy-r61911 <tie-fei.zang@freescale.com>
Date: Wed, 23 Aug 2006 10:19:50 +0800
Subject: [POWERPC] Update mpc7448hpc2 board irq support using device tree

The patch rewrites mpc7448hpc2 board irq support according to the new
mpic device tree interface.

Signed-off-by: Roy Zang	<tie-fei.zang@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index d7a4fc7..ed00ed2 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -1,7 +1,7 @@
 /*
  * mpc7448_hpc2.c
  *
- * Board setup routines for the Freescale Taiga platform
+ * Board setup routines for the Freescale mpc7448hpc2(taiga) platform
  *
  * Author: Jacob Pan
  *	 jacob.pan@freescale.com
@@ -12,10 +12,10 @@
  *
  * Copyright 2004-2006 Freescale Semiconductor, Inc.
  *
- * This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/config.h>
@@ -62,43 +62,8 @@ pci_dram_offset = MPC7448_HPC2_PCI_MEM_OFFSET;
 extern int tsi108_setup_pci(struct device_node *dev);
 extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
 extern void tsi108_pci_int_init(void);
-extern int tsi108_irq_cascade(struct pt_regs *regs, void *unused);
-
-/*
- * Define all of the IRQ senses and polarities.  Taken from the
- * mpc7448hpc  manual.
- * Note:  Likely, this table and the following function should be
- *        obtained and derived from the OF Device Tree.
- */
-
-static u_char mpc7448_hpc2_pic_initsenses[] __initdata = {
-	/* External on-board sources */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* INT[0] XINT0 from FPGA */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* INT[1] XINT1 from FPGA */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* INT[2] PHY_INT from both GIGE */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE),	/* INT[3] RESERVED */
-	/* Internal Tsi108/109 interrupt sources */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* Reserved IRQ */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* Reserved IRQ */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* Reserved IRQ */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* Reserved IRQ */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* DMA0 */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* DMA1 */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* DMA2 */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* DMA3 */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* UART0 */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* UART1 */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* I2C */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* GPIO */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* GIGE0 */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* GIGE1 */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* Reserved IRQ */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* HLP */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* SDC */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* Processor IF */
-	(IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE),	/* Reserved IRQ */
-	(IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE),	/* PCI/X block */
-};
+extern void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc,
+			    struct pt_regs *regs);
 
 int mpc7448_hpc2_exclude_device(u_char bus, u_char devfn)
 {
@@ -229,6 +194,8 @@ static void __init mpc7448_hpc2_init_IRQ(void)
 {
 	struct mpic *mpic;
 	phys_addr_t mpic_paddr = 0;
+	unsigned int cascade_pci_irq;
+	struct device_node *tsi_pci;
 	struct device_node *tsi_pic;
 
 	tsi_pic = of_find_node_by_type(NULL, "open-pic");
@@ -246,24 +213,31 @@ static void __init mpc7448_hpc2_init_IRQ(void)
 	DBG("%s: tsi108pic phys_addr = 0x%x\n", __FUNCTION__,
 	    (u32) mpic_paddr);
 
-	mpic = mpic_alloc(mpic_paddr,
+	mpic = mpic_alloc(tsi_pic, mpic_paddr,
 			MPIC_PRIMARY | MPIC_BIG_ENDIAN | MPIC_WANTS_RESET |
 			MPIC_SPV_EOI | MPIC_MOD_ID(MPIC_ID_TSI108),
 			0, /* num_sources used */
-			TSI108_IRQ_BASE,
 			0, /* num_sources used */
-			NR_IRQS - 4 /* XXXX */,
-			mpc7448_hpc2_pic_initsenses,
-			sizeof(mpc7448_hpc2_pic_initsenses), "Tsi108_PIC");
+			"Tsi108_PIC");
 
 	BUG_ON(mpic == NULL); /* XXXX */
-
 	mpic_init(mpic);
-	mpic_setup_cascade(IRQ_TSI108_PCI, tsi108_irq_cascade, mpic);
+
+	tsi_pci = of_find_node_by_type(NULL, "pci");
+	if (tsi_pci == 0) {
+		printk("%s: No tsi108 pci node found !\n", __FUNCTION__);
+		return;
+	}
+
+	cascade_pci_irq = irq_of_parse_and_map(tsi_pci, 0);
+	set_irq_data(cascade_pci_irq, mpic);
+	set_irq_chained_handler(cascade_pci_irq, tsi108_irq_cascade);
+
 	tsi108_pci_int_init();
 
 	/* Configure MPIC outputs to CPU0 */
 	tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
+	of_node_put(tsi_pic);
 }
 
 void mpc7448_hpc2_show_cpuinfo(struct seq_file *m)
@@ -320,6 +294,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs)
 	return 0;
 
 }
+
 define_machine(mpc7448_hpc2){
 	.name 			= "MPC7448 HPC2",
 	.probe 			= mpc7448_hpc2_probe,
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index 26a0cc8..f303846 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -93,13 +93,15 @@ static int __init tsi108_eth_of_init(void)
 			goto err;
 
 		r[1].name = "tx";
-		r[1].start = np->intrs[0].line;
-		r[1].end = np->intrs[0].line;
+		r[1].start = irq_of_parse_and_map(np, 0);
+		r[1].end = irq_of_parse_and_map(np, 0);
 		r[1].flags = IORESOURCE_IRQ;
+		DBG("%s: name:start->end = %s:0x%lx-> 0x%lx\n",
+			__FUNCTION__,r[1].name, r[1].start, r[1].end);
 
 		tsi_eth_dev =
 		    platform_device_register_simple("tsi-ethernet", i, &r[0],
-						    np->n_intrs + 1);
+						    1);
 
 		if (IS_ERR(tsi_eth_dev)) {
 			ret = PTR_ERR(tsi_eth_dev);
@@ -127,7 +129,7 @@ static int __init tsi108_eth_of_init(void)
 		tsi_eth_data.regs = r[0].start;
 		tsi_eth_data.phyregs = res.start;
 		tsi_eth_data.phy = *phy_id;
-		tsi_eth_data.irq_num = np->intrs[0].line;
+		tsi_eth_data.irq_num = irq_of_parse_and_map(np, 0);
 		of_node_put(phy);
 		ret =
 		    platform_device_add_data(tsi_eth_dev, &tsi_eth_data,
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 3265d54..2ab06ed 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -26,7 +26,6 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 
-
 #include <asm/byteorder.h>
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -228,7 +227,7 @@ int __init tsi108_setup_pci(struct device_node *dev)
 
 	(hose)->ops = &tsi108_direct_pci_ops;
 
-	printk(KERN_INFO "Found tsi108 PCI host bridge at 0x%08lx. "
+	printk(KERN_INFO "Found tsi108 PCI host bridge at 0x%08x. "
 	       "Firmware bus number: %d->%d\n",
 	       rsrc.start, hose->first_busno, hose->last_busno);
 
@@ -278,7 +277,7 @@ static void init_pci_source(void)
 	mb();
 }
 
-static inline int get_pci_source(void)
+static inline unsigned int get_pci_source(void)
 {
 	u_int temp = 0;
 	int irq = -1;
@@ -371,12 +370,12 @@ static void tsi108_pci_irq_end(u_int irq)
  * Interrupt controller descriptor for cascaded PCI interrupt controller.
  */
 
-struct hw_interrupt_type tsi108_pci_irq = {
+static struct irq_chip tsi108_pci_irq = {
 	.typename = "tsi108_PCI_int",
-	.enable = tsi108_pci_irq_enable,
-	.disable = tsi108_pci_irq_disable,
+	.mask = tsi108_pci_irq_disable,
 	.ack = tsi108_pci_irq_ack,
 	.end = tsi108_pci_irq_end,
+	.unmask = tsi108_pci_irq_enable,
 };
 
 /*
@@ -399,14 +398,18 @@ void __init tsi108_pci_int_init(void)
 	DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
 
 	for (i = 0; i < NUM_PCI_IRQS; i++) {
-		irq_desc[i + IRQ_PCI_INTAD_BASE].handler = &tsi108_pci_irq;
+		irq_desc[i + IRQ_PCI_INTAD_BASE].chip = &tsi108_pci_irq;
 		irq_desc[i + IRQ_PCI_INTAD_BASE].status |= IRQ_LEVEL;
 	}
 
 	init_pci_source();
 }
 
-int tsi108_irq_cascade(struct pt_regs *regs, void *unused)
+void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc,
+			    struct pt_regs *regs)
 {
-	return get_pci_source();
+	unsigned int cascade_irq = get_pci_source();
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq, regs);
+	desc->chip->eoi(irq);
 }
diff --git a/include/asm-powerpc/tsi108.h b/include/asm-powerpc/tsi108.h
index c4c278d..2c702d3 100644
--- a/include/asm-powerpc/tsi108.h
+++ b/include/asm-powerpc/tsi108.h
@@ -1,16 +1,18 @@
 /*
- * include/asm-ppc/tsi108.h
- *
  * common routine and memory layout for Tundra TSI108(Grendel) host bridge
  * memory controller.
  *
  * Author: Jacob Pan (jacob.pan@freescale.com)
  *	   Alex Bounine (alexandreb@tundra.com)
- * 2004 (c) Freescale Semiconductor Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ *
+ * Copyright 2004-2006 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
  */
+
 #ifndef __PPC_KERNEL_TSI108_H
 #define __PPC_KERNEL_TSI108_H
 
diff --git a/include/asm-powerpc/tsi108_irq.h b/include/asm-powerpc/tsi108_irq.h
new file mode 100644
index 0000000..3e4d04e
--- /dev/null
+++ b/include/asm-powerpc/tsi108_irq.h
@@ -0,0 +1,124 @@
+/*
+ * (C) Copyright 2005 Tundra Semiconductor Corp.
+ * Alex Bounine, <alexandreb at tundra.com).
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+/*
+ * definitions for interrupt controller initialization and external interrupt
+ * demultiplexing on TSI108EMU/SVB boards.
+ */
+
+#ifndef _ASM_PPC_TSI108_IRQ_H
+#define _ASM_PPC_TSI108_IRQ_H
+
+/*
+ * Tsi108 interrupts
+ */
+#ifndef TSI108_IRQ_REG_BASE
+#define TSI108_IRQ_REG_BASE		0
+#endif
+
+#define TSI108_IRQ(x)		(TSI108_IRQ_REG_BASE + (x))
+
+#define TSI108_MAX_VECTORS	(36 + 4)	/* 36 sources + PCI INT demux */
+#define MAX_TASK_PRIO	0xF
+
+#define TSI108_IRQ_SPURIOUS	(TSI108_MAX_VECTORS)
+
+#define DEFAULT_PRIO_LVL	10	/* initial priority level */
+
+/* Interrupt vectors assignment to external and internal
+ * sources of requests. */
+
+/* EXTERNAL INTERRUPT SOURCES */
+
+#define IRQ_TSI108_EXT_INT0	TSI108_IRQ(0)	/* External Source at INT[0] */
+#define IRQ_TSI108_EXT_INT1	TSI108_IRQ(1)	/* External Source at INT[1] */
+#define IRQ_TSI108_EXT_INT2	TSI108_IRQ(2)	/* External Source at INT[2] */
+#define IRQ_TSI108_EXT_INT3	TSI108_IRQ(3)	/* External Source at INT[3] */
+
+/* INTERNAL INTERRUPT SOURCES */
+
+#define IRQ_TSI108_RESERVED0	TSI108_IRQ(4)	/* Reserved IRQ */
+#define IRQ_TSI108_RESERVED1	TSI108_IRQ(5)	/* Reserved IRQ */
+#define IRQ_TSI108_RESERVED2	TSI108_IRQ(6)	/* Reserved IRQ */
+#define IRQ_TSI108_RESERVED3	TSI108_IRQ(7)	/* Reserved IRQ */
+#define IRQ_TSI108_DMA0		TSI108_IRQ(8)	/* DMA0 */
+#define IRQ_TSI108_DMA1		TSI108_IRQ(9)	/* DMA1 */
+#define IRQ_TSI108_DMA2		TSI108_IRQ(10)	/* DMA2 */
+#define IRQ_TSI108_DMA3		TSI108_IRQ(11)	/* DMA3 */
+#define IRQ_TSI108_UART0	TSI108_IRQ(12)	/* UART0 */
+#define IRQ_TSI108_UART1	TSI108_IRQ(13)	/* UART1 */
+#define IRQ_TSI108_I2C		TSI108_IRQ(14)	/* I2C */
+#define IRQ_TSI108_GPIO		TSI108_IRQ(15)	/* GPIO */
+#define IRQ_TSI108_GIGE0	TSI108_IRQ(16)	/* GIGE0 */
+#define IRQ_TSI108_GIGE1	TSI108_IRQ(17)	/* GIGE1 */
+#define IRQ_TSI108_RESERVED4	TSI108_IRQ(18)	/* Reserved IRQ */
+#define IRQ_TSI108_HLP		TSI108_IRQ(19)	/* HLP */
+#define IRQ_TSI108_SDRAM	TSI108_IRQ(20)	/* SDC */
+#define IRQ_TSI108_PROC_IF	TSI108_IRQ(21)	/* Processor IF */
+#define IRQ_TSI108_RESERVED5	TSI108_IRQ(22)	/* Reserved IRQ */
+#define IRQ_TSI108_PCI		TSI108_IRQ(23)	/* PCI/X block */
+
+#define IRQ_TSI108_MBOX0	TSI108_IRQ(24)	/* Mailbox 0 register */
+#define IRQ_TSI108_MBOX1	TSI108_IRQ(25)	/* Mailbox 1 register */
+#define IRQ_TSI108_MBOX2	TSI108_IRQ(26)	/* Mailbox 2 register */
+#define IRQ_TSI108_MBOX3	TSI108_IRQ(27)	/* Mailbox 3 register */
+
+#define IRQ_TSI108_DBELL0	TSI108_IRQ(28)	/* Doorbell 0 */
+#define IRQ_TSI108_DBELL1	TSI108_IRQ(29)	/* Doorbell 1 */
+#define IRQ_TSI108_DBELL2	TSI108_IRQ(30)	/* Doorbell 2 */
+#define IRQ_TSI108_DBELL3	TSI108_IRQ(31)	/* Doorbell 3 */
+
+#define IRQ_TSI108_TIMER0	TSI108_IRQ(32)	/* Global Timer 0 */
+#define IRQ_TSI108_TIMER1	TSI108_IRQ(33)	/* Global Timer 1 */
+#define IRQ_TSI108_TIMER2	TSI108_IRQ(34)	/* Global Timer 2 */
+#define IRQ_TSI108_TIMER3	TSI108_IRQ(35)	/* Global Timer 3 */
+
+/*
+ * PCI bus INTA# - INTD# lines demultiplexor
+ */
+#define IRQ_PCI_INTAD_BASE	TSI108_IRQ(36)
+#define IRQ_PCI_INTA		(IRQ_PCI_INTAD_BASE + 0)
+#define IRQ_PCI_INTB		(IRQ_PCI_INTAD_BASE + 1)
+#define IRQ_PCI_INTC		(IRQ_PCI_INTAD_BASE + 2)
+#define IRQ_PCI_INTD		(IRQ_PCI_INTAD_BASE + 3)
+#define NUM_PCI_IRQS		(4)
+
+/* number of entries in vector dispatch table */
+#define IRQ_TSI108_TAB_SIZE	(TSI108_MAX_VECTORS + 1)
+
+/* Mapping of MPIC outputs to processors' interrupt pins */
+
+#define IDIR_INT_OUT0		0x1
+#define IDIR_INT_OUT1		0x2
+#define IDIR_INT_OUT2		0x4
+#define IDIR_INT_OUT3		0x8
+
+/*---------------------------------------------------------------
+ * IRQ line configuration parameters */
+
+/* Interrupt delivery modes */
+typedef enum {
+	TSI108_IRQ_DIRECTED,
+	TSI108_IRQ_DISTRIBUTED,
+} TSI108_IRQ_MODE;
+#endif				/*  _ASM_PPC_TSI108_IRQ_H */
-- 
cgit v0.10.2


From be9633e96a9e33ab97776bb6874244fa4db6987d Mon Sep 17 00:00:00 2001
From: Zang Roy-r61911 <tie-fei.zang@freescale.com>
Date: Wed, 23 Aug 2006 10:20:27 +0800
Subject: [POWERPC] Pass UPIO_TSI flag to 8259 serial driver

The patch passes the UPIO_TSI flag to general 8259 serial driver

Signed-off-by: Roy Zang	<tie-fei.zang@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 359ab89..40a3929 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -115,6 +115,7 @@ static int __init add_legacy_soc_port(struct device_node *np,
 	u64 addr;
 	u32 *addrp;
 	upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
+	struct device_node *tsi = of_get_parent(np);
 
 	/* We only support ports that have a clock frequency properly
 	 * encoded in the device-tree.
@@ -134,7 +135,10 @@ static int __init add_legacy_soc_port(struct device_node *np,
 	/* Add port, irq will be dealt with later. We passed a translated
 	 * IO port value. It will be fixed up later along with the irq
 	 */
-	return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
+	if (tsi && !strcmp(tsi->type, "tsi-bridge"))
+		return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0);
+	else
+		return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
 }
 
 static int __init add_legacy_isa_port(struct device_node *np,
@@ -464,7 +468,7 @@ static int __init serial_dev_init(void)
 			fixup_port_irq(i, np, port);
 		if (port->iotype == UPIO_PORT)
 			fixup_port_pio(i, np, port);
-		if (port->iotype == UPIO_MEM)
+		if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI))
 			fixup_port_mmio(i, np, port);
 	}
 
-- 
cgit v0.10.2


From 87589f08beaec3e1f8a3af0c72406c845f706821 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 23 Aug 2006 16:58:39 +1000
Subject: [POWERPC] Correct masks used in emulating some instructions

When we get an illegal instruction exception, we check to see whether
the instruction is one that we emulate for the user program.  Some of
the masks we use in checking whether the offending instruction is one
we care about didn't have the top bit set, which is the MSB of the
major opcode.  Thus some undefined opcodes could get emulated as other
(defined but unimplemented) instructions.  This corrects the masks.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 125761a..4d0b4e7 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -585,14 +585,14 @@ static void parse_fpe(struct pt_regs *regs)
 #define INST_MFSPR_PVR_MASK	0xfc1fffff
 
 #define INST_DCBA		0x7c0005ec
-#define INST_DCBA_MASK		0x7c0007fe
+#define INST_DCBA_MASK		0xfc0007fe
 
 #define INST_MCRXR		0x7c000400
-#define INST_MCRXR_MASK		0x7c0007fe
+#define INST_MCRXR_MASK		0xfc0007fe
 
 #define INST_STRING		0x7c00042a
-#define INST_STRING_MASK	0x7c0007fe
-#define INST_STRING_GEN_MASK	0x7c00067e
+#define INST_STRING_MASK	0xfc0007fe
+#define INST_STRING_GEN_MASK	0xfc00067e
 #define INST_LSWI		0x7c0004aa
 #define INST_LSWX		0x7c00042a
 #define INST_STSWI		0x7c0005aa
-- 
cgit v0.10.2


From d55c4a76f26160482158cd43788dcfc96a320a4f Mon Sep 17 00:00:00 2001
From: Li Yang <leoli@freescale.com>
Date: Wed, 23 Aug 2006 14:13:08 +0800
Subject: [POWERPC] Fix compile problem without CONFIG_PCI

Compile fails without defining CONFIG_PCI.
The patch fix this.

[paulus@samba.org: Moved of_irq_pci_swizzle so we only need one #ifdef]

Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 6a7e997c..11052c2 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -598,11 +598,6 @@ static struct device_node *of_irq_find_parent(struct device_node *child)
 	return p;
 }
 
-static u8 of_irq_pci_swizzle(u8 slot, u8 pin)
-{
-	return (((pin - 1) + slot) % 4) + 1;
-}
-
 /* This doesn't need to be called if you don't have any special workaround
  * flags to pass
  */
@@ -891,6 +886,12 @@ int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq
 }
 EXPORT_SYMBOL_GPL(of_irq_map_one);
 
+#ifdef CONFIG_PCI
+static u8 of_irq_pci_swizzle(u8 slot, u8 pin)
+{
+	return (((pin - 1) + slot) % 4) + 1;
+}
+
 int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 {
 	struct device_node *dn, *ppnode;
@@ -967,4 +968,4 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
 	return of_irq_map_raw(ppnode, &lspec, laddr, out_irq);
 }
 EXPORT_SYMBOL_GPL(of_irq_map_pci);
-
+#endif /* CONFIG_PCI */
-- 
cgit v0.10.2


From 834ac73d4bc804db8ccb3f2a517e36db5f6bc4bd Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Tue, 22 Aug 2006 22:45:06 +0300
Subject: IB/mthca: Update HCA firmware revisions

Update the driver's list of HCA firmware revisions to make sure people
running Sinai firmware older than 1.1.0 get a message suggesting a
firmware upgrade.  Update the Arbel versions as well while we are at it.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 557cde3..7b82c19 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -967,12 +967,12 @@ static struct {
 } mthca_hca_table[] = {
 	[TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 4, 0),
 			   .flags     = 0 },
-	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 400),
+	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600),
 			   .flags     = MTHCA_FLAG_PCIE },
-	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0),
+	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400),
 			   .flags     = MTHCA_FLAG_MEMFREE |
 					MTHCA_FLAG_PCIE },
-	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 0, 800),
+	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 1, 0),
 			   .flags     = MTHCA_FLAG_MEMFREE |
 					MTHCA_FLAG_PCIE    |
 					MTHCA_FLAG_SINAI_OPT }
-- 
cgit v0.10.2


From 25848c4e502ec2bb94ee1f3c82709311059d0857 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 23 Aug 2006 15:33:07 -0700
Subject: [SCSI] esp: Fix build on SUN4.

Noted by Alexey Dobriyan.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/scsi/esp.c b/drivers/scsi/esp.c
index 98bd227..5630868 100644
--- a/drivers/scsi/esp.c
+++ b/drivers/scsi/esp.c
@@ -1146,7 +1146,7 @@ static struct sbus_dev sun4_esp_dev;
 static int __init esp_sun4_probe(struct scsi_host_template *tpnt)
 {
 	if (sun4_esp_physaddr) {
-		memset(&sun4_esp_dev, 0, sizeof(esp_dev));
+		memset(&sun4_esp_dev, 0, sizeof(sun4_esp_dev));
 		sun4_esp_dev.reg_addrs[0].phys_addr = sun4_esp_physaddr;
 		sun4_esp_dev.irqs[0] = 4;
 		sun4_esp_dev.resource[0].start = sun4_esp_physaddr;
@@ -1162,6 +1162,7 @@ static int __init esp_sun4_probe(struct scsi_host_template *tpnt)
 
 static int __devexit esp_sun4_remove(void)
 {
+	struct of_device *dev = &sun4_esp_dev.ofdev;
 	struct esp *esp = dev_get_drvdata(&dev->dev);
 
 	return esp_remove_common(esp);
-- 
cgit v0.10.2


From 0f4184f73d5233214d3225a94fcf1bd4d6678d74 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@misterjones.org>
Date: Wed, 23 Aug 2006 15:50:57 -0700
Subject: [SERIAL] sunsab: Fix E250 console with RSC.

This fixes yet another sunsab problem, when console is set to anything
but the first port. The console framework calls sunsab_console_setup
for each port, and we end up setting up a console on a not yet
discovered port, which leads to an Oops. Instead, defer console setup
until the requested port is properly initialized. Tested on an E250
through an RSC console.

Reported by Daniel Smolik <marvin@mydatex.cz>

Signed-off-by: Marc Zyngier <maz@misterjones.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index dc673e1..cfe20f7 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -886,6 +886,15 @@ static int sunsab_console_setup(struct console *con, char *options)
 	unsigned long flags;
 	unsigned int baud, quot;
 
+	/*
+	 * The console framework calls us for each and every port
+	 * registered. Defer the console setup until the requested
+	 * port has been properly discovered. A bit of a hack,
+	 * though...
+	 */
+	if (up->port.type != PORT_SUNSAB)
+		return -1;
+
 	printk("Console: ttyS%d (SAB82532)\n",
 	       (sunsab_reg.minor - 64) + con->index);
 
-- 
cgit v0.10.2


From b8b99e857d0e258b0da17e55466e5142465d35fd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 23 Aug 2006 15:53:39 -0700
Subject: [SERIAL] sunzilog: Mirror the sunsab serial setup bug fix.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index 47bc3d5..d34f336 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -1146,6 +1146,9 @@ static int __init sunzilog_console_setup(struct console *con, char *options)
 	unsigned long flags;
 	int baud, brg;
 
+	if (up->port.type != PORT_SUNZILOG)
+		return -1;
+
 	printk(KERN_INFO "Console: ttyS%d (SunZilog zs%d)\n",
 	       (sunzilog_reg.minor - 64) + con->index, con->index);
 
-- 
cgit v0.10.2


From c9169f8747bb282cbe518132bf7d49755a00b6c1 Mon Sep 17 00:00:00 2001
From: Adam Litke <agl@us.ibm.com>
Date: Fri, 18 Aug 2006 11:22:21 -0700
Subject: [POWERPC] hugepage BUG fix

On Tue, 2006-08-15 at 08:22 -0700, Dave Hansen wrote:
> kernel BUG in cache_free_debugcheck at mm/slab.c:2748!

Alright, this one is only triggered when slab debugging is enabled.  The
slabs are assumed to be aligned on a HUGEPTE_TABLE_SIZE boundary.  The free
path makes use of this assumption and uses the lowest nibble to pass around
an index into an array of kmem_cache pointers.  With slab debugging turned
on, the slab is still aligned, but the "working" object pointer is not.
This would break the assumption above that a full nibble is available for
the PGF_CACHENUM_MASK.

The following patch reduces PGF_CACHENUM_MASK to cover only the two least
significant bits, which is enough to cover the current number of 4 pgtable
cache types.  Then use this constant to mask out the appropriate part of
the huge pte pointer.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 266b8b2..5615acc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -153,7 +153,7 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
 	hpdp->pd = 0;
 	tlb->need_flush = 1;
 	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM,
-						 HUGEPTE_TABLE_SIZE-1));
+						 PGF_CACHENUM_MASK));
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h
index 9f0917c..ae63db7 100644
--- a/include/asm-powerpc/pgalloc.h
+++ b/include/asm-powerpc/pgalloc.h
@@ -117,7 +117,7 @@ static inline void pte_free(struct page *ptepage)
 	pte_free_kernel(page_address(ptepage));
 }
 
-#define PGF_CACHENUM_MASK	0xf
+#define PGF_CACHENUM_MASK	0x3
 
 typedef struct pgtable_free {
 	unsigned long val;
-- 
cgit v0.10.2


From f3745a3f9fa39fa3c62f7d5b8549ee787d2c6848 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 22 Aug 2006 21:06:46 +0900
Subject: [PATCH] ata_piix: ignore PCS on ICH5

There have been a number of reports regarding some ICH5s failing to
detect devices since the PCS handling update.  Analysis shows that
these problems are caused by bogus PCS values from those controllers.

Before the PCS update, the driver didn't honor PCS regs exactly and
probed them in many cases PCS reports no device.  Now that PCS is
honored exactly, these hardware problems are visible.

This patch makes ICH5 ignore PCS.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c
index 01b3530..3b98f18 100644
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -390,7 +390,8 @@ static struct ata_port_info piix_port_info[] = {
 	/* ich5_sata */
 	{
 		.sht		= &piix_sht,
-		.host_flags	= ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR,
+		.host_flags	= ATA_FLAG_SATA | PIIX_FLAG_CHECKINTR |
+				  PIIX_FLAG_IGNORE_PCS,
 		.pio_mask	= 0x1f,	/* pio0-4 */
 		.mwdma_mask	= 0x07, /* mwdma0-2 */
 		.udma_mask	= 0x7f,	/* udma0-6 */
-- 
cgit v0.10.2


From 9dd9c16465c82d1385f97d2a245641464fcb7894 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 22 Aug 2006 21:15:58 +0900
Subject: [PATCH] ata_piix: implement force_pcs module parameter

This patch implements force_pcs module parameter for ata_piix.  If 1,
PCS is ignored, 2 honored.  As there seem to be quite a few ICHs w/
impaired PCS, this option will be useful for cases where the default
setting doesn't work.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c
index 3b98f18..2d20caf 100644
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -468,6 +468,11 @@ MODULE_LICENSE("GPL");
 MODULE_DEVICE_TABLE(pci, piix_pci_tbl);
 MODULE_VERSION(DRV_VERSION);
 
+static int force_pcs = 0;
+module_param(force_pcs, int, 0444);
+MODULE_PARM_DESC(force_pcs, "force honoring or ignoring PCS to work around "
+		 "device mis-detection (0=default, 1=ignore PCS, 2=honor PCS)");
+
 /**
  *	piix_pata_cbl_detect - Probe host controller cable detect info
  *	@ap: Port for which cable detect info is desired
@@ -812,6 +817,7 @@ static int __devinit piix_check_450nx_errata(struct pci_dev *ata_dev)
 }
 
 static void __devinit piix_init_pcs(struct pci_dev *pdev,
+				    struct ata_port_info *pinfo,
 				    const struct piix_map_db *map_db)
 {
 	u16 pcs, new_pcs;
@@ -825,6 +831,18 @@ static void __devinit piix_init_pcs(struct pci_dev *pdev,
 		pci_write_config_word(pdev, ICH5_PCS, new_pcs);
 		msleep(150);
 	}
+
+	if (force_pcs == 1) {
+		dev_printk(KERN_INFO, &pdev->dev,
+			   "force ignoring PCS (0x%x)\n", new_pcs);
+		pinfo[0].host_flags |= PIIX_FLAG_IGNORE_PCS;
+		pinfo[1].host_flags |= PIIX_FLAG_IGNORE_PCS;
+	} else if (force_pcs == 2) {
+		dev_printk(KERN_INFO, &pdev->dev,
+			   "force honoring PCS (0x%x)\n", new_pcs);
+		pinfo[0].host_flags &= ~PIIX_FLAG_IGNORE_PCS;
+		pinfo[1].host_flags &= ~PIIX_FLAG_IGNORE_PCS;
+	}
 }
 
 static void __devinit piix_init_sata_map(struct pci_dev *pdev,
@@ -933,7 +951,8 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (host_flags & ATA_FLAG_SATA) {
 		piix_init_sata_map(pdev, port_info,
 				   piix_map_db_table[ent->driver_data]);
-		piix_init_pcs(pdev, piix_map_db_table[ent->driver_data]);
+		piix_init_pcs(pdev, port_info,
+			      piix_map_db_table[ent->driver_data]);
 	}
 
 	/* On ICH5, some BIOSen disable the interrupt using the
-- 
cgit v0.10.2


From ac2164d5e425fa4755bdbab9641d8dab7239b6f5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Wed, 23 Aug 2006 01:00:27 +0900
Subject: [PATCH] sata_via: use old SCR access pattern on vt6420

vt6420 has super-fragile SCR registers which can hang the whole
machine if accessed with the wrong timings.  This patch makes sata_via
use SCR registers only during probing and with the same timings as
before (pre new EH), which is proven to work.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c
index 03baec2..01d4036 100644
--- a/drivers/scsi/sata_via.c
+++ b/drivers/scsi/sata_via.c
@@ -74,6 +74,7 @@ enum {
 static int svia_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
 static u32 svia_scr_read (struct ata_port *ap, unsigned int sc_reg);
 static void svia_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
+static void vt6420_error_handler(struct ata_port *ap);
 
 static const struct pci_device_id svia_pci_tbl[] = {
 	{ 0x1106, 0x3149, PCI_ANY_ID, PCI_ANY_ID, 0, 0, vt6420 },
@@ -107,7 +108,38 @@ static struct scsi_host_template svia_sht = {
 	.bios_param		= ata_std_bios_param,
 };
 
-static const struct ata_port_operations svia_sata_ops = {
+static const struct ata_port_operations vt6420_sata_ops = {
+	.port_disable		= ata_port_disable,
+
+	.tf_load		= ata_tf_load,
+	.tf_read		= ata_tf_read,
+	.check_status		= ata_check_status,
+	.exec_command		= ata_exec_command,
+	.dev_select		= ata_std_dev_select,
+
+	.bmdma_setup            = ata_bmdma_setup,
+	.bmdma_start            = ata_bmdma_start,
+	.bmdma_stop		= ata_bmdma_stop,
+	.bmdma_status		= ata_bmdma_status,
+
+	.qc_prep		= ata_qc_prep,
+	.qc_issue		= ata_qc_issue_prot,
+	.data_xfer		= ata_pio_data_xfer,
+
+	.freeze			= ata_bmdma_freeze,
+	.thaw			= ata_bmdma_thaw,
+	.error_handler		= vt6420_error_handler,
+	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
+
+	.irq_handler		= ata_interrupt,
+	.irq_clear		= ata_bmdma_irq_clear,
+
+	.port_start		= ata_port_start,
+	.port_stop		= ata_port_stop,
+	.host_stop		= ata_host_stop,
+};
+
+static const struct ata_port_operations vt6421_sata_ops = {
 	.port_disable		= ata_port_disable,
 
 	.tf_load		= ata_tf_load,
@@ -141,13 +173,13 @@ static const struct ata_port_operations svia_sata_ops = {
 	.host_stop		= ata_host_stop,
 };
 
-static struct ata_port_info svia_port_info = {
+static struct ata_port_info vt6420_port_info = {
 	.sht		= &svia_sht,
 	.host_flags	= ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
 	.pio_mask	= 0x1f,
 	.mwdma_mask	= 0x07,
 	.udma_mask	= 0x7f,
-	.port_ops	= &svia_sata_ops,
+	.port_ops	= &vt6420_sata_ops,
 };
 
 MODULE_AUTHOR("Jeff Garzik");
@@ -170,6 +202,81 @@ static void svia_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val)
 	outl(val, ap->ioaddr.scr_addr + (4 * sc_reg));
 }
 
+/**
+ *	vt6420_prereset - prereset for vt6420
+ *	@ap: target ATA port
+ *
+ *	SCR registers on vt6420 are pieces of shit and may hang the
+ *	whole machine completely if accessed with the wrong timing.
+ *	To avoid such catastrophe, vt6420 doesn't provide generic SCR
+ *	access operations, but uses SStatus and SControl only during
+ *	boot probing in controlled way.
+ *
+ *	As the old (pre EH update) probing code is proven to work, we
+ *	strictly follow the access pattern.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise.
+ */
+static int vt6420_prereset(struct ata_port *ap)
+{
+	struct ata_eh_context *ehc = &ap->eh_context;
+	unsigned long timeout = jiffies + (HZ * 5);
+	u32 sstatus, scontrol;
+	int online;
+
+	/* don't do any SCR stuff if we're not loading */
+	if (!ATA_PFLAG_LOADING)
+		goto skip_scr;
+
+	/* Resume phy.  This is the old resume sequence from
+	 * __sata_phy_reset().
+	 */
+	svia_scr_write(ap, SCR_CONTROL, 0x300);
+	svia_scr_read(ap, SCR_CONTROL); /* flush */
+
+	/* wait for phy to become ready, if necessary */
+	do {
+		msleep(200);
+		if ((svia_scr_read(ap, SCR_STATUS) & 0xf) != 1)
+			break;
+	} while (time_before(jiffies, timeout));
+
+	/* open code sata_print_link_status() */
+	sstatus = svia_scr_read(ap, SCR_STATUS);
+	scontrol = svia_scr_read(ap, SCR_CONTROL);
+
+	online = (sstatus & 0xf) == 0x3;
+
+	ata_port_printk(ap, KERN_INFO,
+			"SATA link %s 1.5 Gbps (SStatus %X SControl %X)\n",
+			online ? "up" : "down", sstatus, scontrol);
+
+	/* SStatus is read one more time */
+	svia_scr_read(ap, SCR_STATUS);
+
+	if (!online) {
+		/* tell EH to bail */
+		ehc->i.action &= ~ATA_EH_RESET_MASK;
+		return 0;
+	}
+
+ skip_scr:
+	/* wait for !BSY */
+	ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
+
+	return 0;
+}
+
+static void vt6420_error_handler(struct ata_port *ap)
+{
+	return ata_bmdma_drive_eh(ap, vt6420_prereset, ata_std_softreset,
+				  NULL, ata_std_postreset);
+}
+
 static const unsigned int svia_bar_sizes[] = {
 	8, 4, 8, 4, 16, 256
 };
@@ -210,7 +317,7 @@ static void vt6421_init_addrs(struct ata_probe_ent *probe_ent,
 static struct ata_probe_ent *vt6420_init_probe_ent(struct pci_dev *pdev)
 {
 	struct ata_probe_ent *probe_ent;
-	struct ata_port_info *ppi = &svia_port_info;
+	struct ata_port_info *ppi = &vt6420_port_info;
 
 	probe_ent = ata_pci_init_native_mode(pdev, &ppi, ATA_PORT_PRIMARY | ATA_PORT_SECONDARY);
 	if (!probe_ent)
@@ -239,7 +346,7 @@ static struct ata_probe_ent *vt6421_init_probe_ent(struct pci_dev *pdev)
 
 	probe_ent->sht		= &svia_sht;
 	probe_ent->host_flags	= ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY;
-	probe_ent->port_ops	= &svia_sata_ops;
+	probe_ent->port_ops	= &vt6421_sata_ops;
 	probe_ent->n_ports	= N_PORTS;
 	probe_ent->irq		= pdev->irq;
 	probe_ent->irq_flags	= IRQF_SHARED;
-- 
cgit v0.10.2


From 8e79a441a4d8a34d64efe93add49b3eefca5cd1c Mon Sep 17 00:00:00 2001
From: Horst Hummel <horst.hummel@de.ibm.com>
Date: Thu, 24 Aug 2006 13:22:36 +0200
Subject: [S390] dasd PAV enabling.

The subsystem check in the PAV code is incorrect, it enables PAV
per device instead of per subsystem.

Signed-off-by: Horst Hummel <horst.hummel@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 9d0c6e1..9af02c7 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -54,11 +54,11 @@ struct dasd_devmap {
  */
 struct dasd_server_ssid_map {
 	struct list_head list;
-	struct server_id {
+	struct system_id {
 		char vendor[4];
 		char serial[15];
+		__u16 ssid;
 	} sid;
-	__u16 ssid;
 };
 
 static struct list_head dasd_server_ssid_list;
@@ -904,14 +904,14 @@ dasd_set_uid(struct ccw_device *cdev, struct dasd_uid *uid)
 		return -ENOMEM;
 	strncpy(srv->sid.vendor, uid->vendor, sizeof(srv->sid.vendor) - 1);
 	strncpy(srv->sid.serial, uid->serial, sizeof(srv->sid.serial) - 1);
-	srv->ssid = uid->ssid;
+	srv->sid.ssid = uid->ssid;
 
 	/* server is already contained ? */
 	spin_lock(&dasd_devmap_lock);
 	devmap->uid = *uid;
 	list_for_each_entry(tmp, &dasd_server_ssid_list, list) {
 		if (!memcmp(&srv->sid, &tmp->sid,
-			    sizeof(struct dasd_server_ssid_map))) {
+			    sizeof(struct system_id))) {
 			kfree(srv);
 			srv = NULL;
 			break;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 957ed5d..b7a7fac 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -607,7 +607,7 @@ dasd_eckd_psf_ssc(struct dasd_device *device)
  * Valide storage server of current device.
  */
 static int
-dasd_eckd_validate_server(struct dasd_device *device)
+dasd_eckd_validate_server(struct dasd_device *device, struct dasd_uid *uid)
 {
 	int rc;
 
@@ -616,11 +616,11 @@ dasd_eckd_validate_server(struct dasd_device *device)
 		return 0;
 
 	rc = dasd_eckd_psf_ssc(device);
-	if (rc)
-		/* may be requested feature is not available on server,
-		 * therefore just report error and go ahead */
-		DEV_MESSAGE(KERN_INFO, device,
-			    "Perform Subsystem Function returned rc=%d", rc);
+	/* may be requested feature is not available on server,
+	 * therefore just report error and go ahead */
+	DEV_MESSAGE(KERN_INFO, device,
+		    "PSF-SSC on storage subsystem %s.%s.%04x returned rc=%d",
+		    uid->vendor, uid->serial, uid->ssid, rc);
 	/* RE-Read Configuration Data */
 	return dasd_eckd_read_conf(device);
 }
@@ -666,7 +666,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 		return rc;
 	rc = dasd_set_uid(device->cdev, &uid);
 	if (rc == 1)	/* new server found */
-		rc = dasd_eckd_validate_server(device);
+		rc = dasd_eckd_validate_server(device, &uid);
 	if (rc)
 		return rc;
 
-- 
cgit v0.10.2


From ddeff520f02b92128132c282c350fa72afffb84a Mon Sep 17 00:00:00 2001
From: Nikita Danilov <nikita@clusterfs.com>
Date: Wed, 9 Aug 2006 13:53:47 -0400
Subject: NFS: Fix a potential deadlock in nfs_release_page

nfs_wb_page() waits on request completion and, as a result, is not safe to be
called from nfs_release_page() invoked by VM scanner as part of GFP_NOFS
allocation. Fix possible deadlock by analyzing gfp mask and refusing to
release page if __GFP_FS is not set.

Signed-off-by: Nikita Danilov <danilov@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 374d969debfb290bafcb41d28918dc6f7e43ce31 commit)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc2b874..48e8928 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	return !nfs_wb_page(page->mapping->host, page);
+	if (gfp & __GFP_FS)
+		return !nfs_wb_page(page->mapping->host, page);
+	else
+		/*
+		 * Avoid deadlock on nfs_wait_on_request().
+		 */
+		return 0;
 }
 
 const struct address_space_operations nfs_file_aops = {
-- 
cgit v0.10.2


From a634904a7de0d3a0bc606f608007a34e8c05bfee Mon Sep 17 00:00:00 2001
From: ASANO Masahiro <masano@tnes.nec.co.jp>
Date: Tue, 22 Aug 2006 20:06:02 -0400
Subject: VFS: add lookup hint for network file systems

I'm trying to speeding up mkdir(2) for network file systems.  A typical
mkdir(2) calls two inode_operations: lookup and mkdir.  The lookup
operation would fail with ENOENT in common case.  I think it is unnecessary
because the subsequent mkdir operation can check it.  In case of creat(2),
lookup operation is called with the LOOKUP_CREATE flag, so individual
filesystem can omit real lookup.  e.g.  nfs_lookup().

Here is a sample patch which uses LOOKUP_CREATE and O_EXCL on mkdir,
symlink and mknod.  This uses the gadget for creat(2).

And here is the result of a benchmark on NFSv3.
  mkdir(2) 10,000 times:
    original  50.5 sec
    patched   29.0 sec

Signed-off-by: ASANO Masahiro <masano@tnes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from fab7bf44449b29f9d5572a5dd8adcf7c91d5bf0f commit)

diff --git a/fs/namei.c b/fs/namei.c
index 55a1312..8631664 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1767,6 +1767,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 	if (nd->last_type != LAST_NORM)
 		goto fail;
 	nd->flags &= ~LOOKUP_PARENT;
+	nd->flags |= LOOKUP_CREATE;
+	nd->intent.open.flags = O_EXCL;
 
 	/*
 	 * Do the final lookup.
-- 
cgit v0.10.2


From 5d67476fff2df6ff12f60b540fd0e74cf2a668f9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 31 Jul 2006 14:11:48 -0700
Subject: SUNRPC: make rpc_unlink() take a dentry argument instead of a path

Signe-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 88bf6d811b01a4be7fd507d18bf5f1c527989089 commit)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b81e7ed..df0be12 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -130,9 +130,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
+	rpc_unlink(idmap->idmap_dentry);
 	dput(idmap->idmap_dentry);
-	idmap->idmap_dentry = NULL;
-	rpc_unlink(idmap->idmap_path);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
 }
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 2c2189c..04d2767 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -44,7 +44,7 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
 extern int rpc_rmdir(char *);
 extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
-extern int rpc_unlink(char *);
+extern int rpc_unlink(struct dentry *);
 extern struct vfsmount *rpc_get_mount(void);
 extern void rpc_put_mount(void);
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4a9aa93..beaa7b8 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -718,7 +718,7 @@ gss_destroy(struct rpc_auth *auth)
 		auth, auth->au_flavor);
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
-	rpc_unlink(gss_auth->path);
+	rpc_unlink(gss_auth->dentry);
 	dput(gss_auth->dentry);
 	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a3bd2db..9144f27 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -746,22 +746,15 @@ err_dput:
 }
 
 int
-rpc_unlink(char *path)
+rpc_unlink(struct dentry *dentry)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
+	struct dentry *parent;
 	struct inode *dir;
-	int error;
+	int error = 0;
 
-	if ((error = rpc_lookup_parent(path, &nd)) != 0)
-		return error;
-	dir = nd.dentry->d_inode;
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_release;
-	}
 	d_drop(dentry);
 	if (dentry->d_inode) {
 		rpc_close_pipes(dentry->d_inode);
@@ -769,9 +762,8 @@ rpc_unlink(char *path)
 	}
 	dput(dentry);
 	inode_dir_notify(dir, DN_DELETE);
-out_release:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(&nd);
+	dput(parent);
 	return error;
 }
 
-- 
cgit v0.10.2


From dff02cc1a34fcb60904a2c57cb351857cc11219e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 31 Jul 2006 14:17:18 -0700
Subject: NFS: clean up rpc_rmdir

Make it take a dentry argument instead of a path

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 648d4116eb2509f010f7f34704a650150309b3e7 commit)

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 04d2767..a481472 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -42,7 +42,7 @@ RPC_I(struct inode *inode)
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
 extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
-extern int rpc_rmdir(char *);
+extern int rpc_rmdir(struct dentry *);
 extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
 extern int rpc_unlink(struct dentry *);
 extern struct vfsmount *rpc_get_mount(void);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d6409e7..d307556 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -183,7 +183,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 
 out_no_auth:
 	if (!IS_ERR(clnt->cl_dentry)) {
-		rpc_rmdir(clnt->cl_pathname);
+		rpc_rmdir(clnt->cl_dentry);
 		dput(clnt->cl_dentry);
 		rpc_put_mount();
 	}
@@ -320,8 +320,8 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		rpc_destroy_client(clnt->cl_parent);
 		goto out_free;
 	}
-	if (clnt->cl_pathname[0])
-		rpc_rmdir(clnt->cl_pathname);
+	if (!IS_ERR(clnt->cl_dentry))
+		rpc_rmdir(clnt->cl_dentry);
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9144f27..9c355e1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -684,28 +684,20 @@ err_dput:
 }
 
 int
-rpc_rmdir(char *path)
+rpc_rmdir(struct dentry *dentry)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
+	struct dentry *parent;
 	struct inode *dir;
 	int error;
 
-	if ((error = rpc_lookup_parent(path, &nd)) != 0)
-		return error;
-	dir = nd.dentry->d_inode;
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_release;
-	}
 	rpc_depopulate(dentry);
 	error = __rpc_rmdir(dir, dentry);
 	dput(dentry);
-out_release:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(&nd);
+	dput(parent);
 	return error;
 }
 
-- 
cgit v0.10.2


From 68adb0af51ebccb72ffb14d49cb8121b1afc4259 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 10 Aug 2006 17:51:46 -0400
Subject: SUNRPC: rpc_unlink() must check for unhashed dentries

A prior call to rpc_depopulate() by rpc_rmdir() on the parent directory may
have already called simple_unlink() on this entry.
Add the same check to rpc_rmdir(). Also remove a redundant call to
rpc_close_pipes() in rpc_rmdir.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 0bbfb9d20f6437c4031aa3bf9b4d311a053e58e3 commit)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9c355e1..0b1a1ac 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -539,6 +539,7 @@ repeat:
 				rpc_close_pipes(dentry->d_inode);
 				simple_unlink(dir, dentry);
 			}
+			inode_dir_notify(dir, DN_DELETE);
 			dput(dentry);
 		} while (n);
 		goto repeat;
@@ -610,8 +611,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
 	int error;
 
 	shrink_dcache_parent(dentry);
-	if (dentry->d_inode)
-		rpc_close_pipes(dentry->d_inode);
+	if (d_unhashed(dentry))
+		return 0;
 	if ((error = simple_rmdir(dir, dentry)) != 0)
 		return error;
 	if (!error) {
@@ -747,13 +748,15 @@ rpc_unlink(struct dentry *dentry)
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	d_drop(dentry);
-	if (dentry->d_inode) {
-		rpc_close_pipes(dentry->d_inode);
-		error = simple_unlink(dir, dentry);
+	if (!d_unhashed(dentry)) {
+		d_drop(dentry);
+		if (dentry->d_inode) {
+			rpc_close_pipes(dentry->d_inode);
+			error = simple_unlink(dir, dentry);
+		}
+		inode_dir_notify(dir, DN_DELETE);
 	}
 	dput(dentry);
-	inode_dir_notify(dir, DN_DELETE);
 	mutex_unlock(&dir->i_mutex);
 	dput(parent);
 	return error;
-- 
cgit v0.10.2


From 8f8e7a50f450fcb86a5b2ffb94543c57a14f8260 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 14 Aug 2006 13:11:15 -0400
Subject: SUNRPC: Fix dentry refcounting issues with users of rpc_pipefs

rpc_unlink() and rpc_rmdir() will dput the dentry reference for you.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from a05a57effa71a1f67ccbfc52335c10c8b85f3f6a commit)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index df0be12..07a5dd5 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -131,7 +131,6 @@ nfs_idmap_delete(struct nfs4_client *clp)
 	if (!idmap)
 		return;
 	rpc_unlink(idmap->idmap_dentry);
-	dput(idmap->idmap_dentry);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
 }
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index beaa7b8..ef1cf5b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -719,7 +719,6 @@ gss_destroy(struct rpc_auth *auth)
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
 	rpc_unlink(gss_auth->dentry);
-	dput(gss_auth->dentry);
 	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d307556..d9eac70 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -184,7 +184,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 out_no_auth:
 	if (!IS_ERR(clnt->cl_dentry)) {
 		rpc_rmdir(clnt->cl_dentry);
-		dput(clnt->cl_dentry);
 		rpc_put_mount();
 	}
 out_no_path:
@@ -251,10 +250,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
 	new->cl_dead = 0;
-	if (!IS_ERR(new->cl_dentry)) {
+	if (!IS_ERR(new->cl_dentry))
 		dget(new->cl_dentry);
-		rpc_get_mount();
-	}
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
@@ -317,11 +314,15 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		clnt->cl_auth = NULL;
 	}
 	if (clnt->cl_parent != clnt) {
+		if (!IS_ERR(clnt->cl_dentry))
+			dput(clnt->cl_dentry);
 		rpc_destroy_client(clnt->cl_parent);
 		goto out_free;
 	}
-	if (!IS_ERR(clnt->cl_dentry))
+	if (!IS_ERR(clnt->cl_dentry)) {
 		rpc_rmdir(clnt->cl_dentry);
+		rpc_put_mount();
+	}
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
@@ -331,10 +332,6 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 out_free:
 	rpc_free_iostats(clnt->cl_metrics);
 	clnt->cl_metrics = NULL;
-	if (!IS_ERR(clnt->cl_dentry)) {
-		dput(clnt->cl_dentry);
-		rpc_put_mount();
-	}
 	kfree(clnt);
 	return 0;
 }
-- 
cgit v0.10.2


From 01df9c5e918ae5559f2d96da0143f8bfbb9e6171 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 10 Aug 2006 11:58:57 -0400
Subject: LOCKD: Fix a deadlock in nlm_traverse_files()

nlm_traverse_files() is not allowed to hold the nlm_file_mutex while calling
nlm_inspect file, since it may end up calling nlm_release_file() when
releaseing the blocks.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from e558d3cde986e04f68afe8c790ad68ef4b94587a commit)

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2a4df9b..01b4db9 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -237,19 +237,22 @@ static int
 nlm_traverse_files(struct nlm_host *host, int action)
 {
 	struct nlm_file	*file, **fp;
-	int		i;
+	int i, ret = 0;
 
 	mutex_lock(&nlm_file_mutex);
 	for (i = 0; i < FILE_NRHASH; i++) {
 		fp = nlm_files + i;
 		while ((file = *fp) != NULL) {
+			file->f_count++;
+			mutex_unlock(&nlm_file_mutex);
+
 			/* Traverse locks, blocks and shares of this file
 			 * and update file->f_locks count */
-			if (nlm_inspect_file(host, file, action)) {
-				mutex_unlock(&nlm_file_mutex);
-				return 1;
-			}
+			if (nlm_inspect_file(host, file, action))
+				ret = 1;
 
+			mutex_lock(&nlm_file_mutex);
+			file->f_count--;
 			/* No more references to this file. Let go of it. */
 			if (!file->f_blocks && !file->f_locks
 			 && !file->f_shares && !file->f_count) {
@@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action)
 		}
 	}
 	mutex_unlock(&nlm_file_mutex);
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v0.10.2


From 79558f3610efd7928e8882b2eaca3093b283630e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 13:44:32 -0400
Subject: NFS: Fix issue with EIO on NFS read

The problem is that we may be caching writes that would extend the file and
create a hole in the region that we are reading. In this case, we need to
detect the eof from the server, ensure that we zero out the pages that
are part of the hole and mark them as up to date.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 856b603b01b99146918c093969b6cb1b1b0f1c01 commit)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 65c0c5b..da9cf11 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
 	base &= ~PAGE_CACHE_MASK;
 	pglen = PAGE_CACHE_SIZE - base;
-	if (pglen < remainder)
+	for (;;) {
+		if (remainder <= pglen) {
+			memclear_highpage_flush(*pages, base, remainder);
+			break;
+		}
 		memclear_highpage_flush(*pages, base, pglen);
-	else
-		memclear_highpage_flush(*pages, base, remainder);
+		pages++;
+		remainder -= pglen;
+		pglen = PAGE_CACHE_SIZE;
+		base = 0;
+	}
 }
 
 /*
@@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	unsigned int base = data->args.pgbase;
 	struct page **pages;
 
+	if (data->res.eof)
+		count = data->args.count;
 	if (unlikely(count == 0))
 		return;
 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
@@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageUptodate(*pages);
-	/*
-	 * Was this an eof or a short read? If the latter, don't mark the page
-	 * as uptodate yet.
-	 */
-	if (count > 0 && (data->res.eof || data->args.count == data->res.count))
+	if (count != 0)
 		SetPageUptodate(*pages);
 }
 
@@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageError(*pages);
+	if (count != 0)
+		SetPageError(*pages);
 }
 
 /*
-- 
cgit v0.10.2


From 8e037094c414172481c5ce903efdab50ce932343 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:15 -0400
Subject: SUNRPC: avoid choosing an IPMI port for RPC traffic

Some hardware uses port 664 for its hardware-based IPMI listener.  Teach
the RPC client to avoid using that port by raising the default minimum port
number to 665.

Test plan:
Find a mainboard known to use port 664 for IPMI; enable IPMI; mount NFS
servers in a tight loop.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 58e8cb3a035d22fc386e1c53a5d98c3f219530fb commit)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 840e47a..3a0cca2 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -37,7 +37,7 @@ extern unsigned int xprt_max_resvport;
 
 #define RPC_MIN_RESVPORT	(1U)
 #define RPC_MAX_RESVPORT	(65535U)
-#define RPC_DEF_MIN_RESVPORT	(650U)
+#define RPC_DEF_MIN_RESVPORT	(665U)
 #define RPC_DEF_MAX_RESVPORT	(1023U)
 
 /*
-- 
cgit v0.10.2


From 3cedf13af9f7e61aca0dbbd11b601ac93bf93a9f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Thu, 24 Aug 2006 15:44:12 -0400
Subject: NFSv4: increase client-provided nfs4 clientid size

Neil Brown observed that the current limit of 32 bytes isn't enough to hold two
ip addresses and the rest of the stuff we're putting in it, so it's often
truncated to the point where it's unlikely to be unique.  This can cause
spurious CLID_INUSE's from the server.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from fc8c17ec251e984ab3df9182ed097aa5b577c915 commit)

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2d3fb64..db9cbf6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -659,7 +659,7 @@ struct nfs4_rename_res {
 struct nfs4_setclientid {
 	const nfs4_verifier *		sc_verifier;      /* request */
 	unsigned int			sc_name_len;
-	char				sc_name[32];	  /* request */
+	char				sc_name[48];	  /* request */
 	u32				sc_prog;          /* request */
 	unsigned int			sc_netid_len;
 	char				sc_netid[4];	  /* request */
-- 
cgit v0.10.2


From e8896495bca8490a427409e0886d63d05419ec65 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 24 Aug 2006 15:44:19 -0400
Subject: NFS: Check lengths more thoroughly in NFS4 readdir XDR decode

Check the bounds of length specifiers more thoroughly in the XDR decoding of
NFS4 readdir reply data.

Currently, if the server returns a bitmap or attr length that causes the
current decode point pointer to wrap, this could go undetected (consider a
small "negative" length on a 32-bit machine).

Also add a check into the main XDR decode handler to make sure that the amount
of data is a multiple of four bytes (as specified by RFC-1014).  This makes
sure that we can do u32* pointer subtraction in the NFS client without risking
an undefined result (the result is undefined if the pointers are not correctly
aligned with respect to one another).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 5861fddd64a7eaf7e8b1a9997455a24e7f688092 commit)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1750d99..730ec8f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	struct kvec	*iov = rcvbuf->head;
 	unsigned int	nr, pglen = rcvbuf->page_len;
 	uint32_t	*end, *entry, *p, *kaddr;
-	uint32_t	len, attrlen;
+	uint32_t	len, attrlen, xlen;
 	int 		hdrlen, recvd, status;
 
 	status = decode_op_hdr(xdr, OP_READDIR);
@@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 
 	BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
 	kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
-	end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
+	end = p + ((pglen + readdir->pgbase) >> 2);
 	entry = p;
 	for (nr = 0; *p++; nr++) {
-		if (p + 3 > end)
+		if (end - p < 3)
 			goto short_pkt;
 		dprintk("cookie = %Lu, ", *((unsigned long long *)p));
 		p += 2;			/* cookie */
@@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
 			goto err_unmap;
 		}
-		dprintk("filename = %*s\n", len, (char *)p);
-		p += XDR_QUADLEN(len);
-		if (p + 1 > end)
+		xlen = XDR_QUADLEN(len);
+		if (end - p < xlen + 1)
 			goto short_pkt;
+		dprintk("filename = %*s\n", len, (char *)p);
+		p += xlen;
 		len = ntohl(*p++);	/* bitmap length */
-		p += len;
-		if (p + 1 > end)
+		if (end - p < len + 1)
 			goto short_pkt;
+		p += len;
 		attrlen = XDR_QUADLEN(ntohl(*p++));
-		p += attrlen;		/* attributes */
-		if (p + 2 > end)
+		if (end - p < attrlen + 2)
 			goto short_pkt;
+		p += attrlen;		/* attributes */
 		entry = p;
 	}
 	if (!nr && (entry[0] != 0 || entry[1] == 0))
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d9eac70..3e19d32 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1181,6 +1181,17 @@ call_verify(struct rpc_task *task)
 	u32	*p = iov->iov_base, n;
 	int error = -EACCES;
 
+	if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
+		/* RFC-1014 says that the representation of XDR data must be a
+		 * multiple of four bytes
+		 * - if it isn't pointer subtraction in the NFS client may give
+		 *   undefined results
+		 */
+		printk(KERN_WARNING
+		       "call_verify: XDR representation not a multiple of"
+		       " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len);
+		goto out_eio;
+	}
 	if ((len -= 3) < 0)
 		goto out_overflow;
 	p += 1;	/* skip XID */
-- 
cgit v0.10.2


From 16b4289c7460ba9c04af40c574949dcca9029658 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 24 Aug 2006 12:27:15 -0400
Subject: NFSv4: Add v4 exception handling for the ACL functions.

This is needed in order to handle any NFS4ERR_DELAY errors that might be
returned by the server. It also ensures that we map the NFSv4 errors before
they are returned to userland.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 71c12b3f0abc7501f6ed231a6d17bc9c05a238dc commit)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e6ee97f..153898e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2668,7 +2668,7 @@ out:
 	nfs4_set_cached_acl(inode, acl);
 }
 
-static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
 	struct page *pages[NFS4ACL_MAXPAGES];
 	struct nfs_getaclargs args = {
@@ -2721,6 +2721,19 @@ out_free:
 	return ret;
 }
 
+static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+{
+	struct nfs4_exception exception = { };
+	ssize_t ret;
+	do {
+		ret = __nfs4_get_acl_uncached(inode, buf, buflen);
+		if (ret >= 0)
+			break;
+		ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
+	} while (exception.retry);
+	return ret;
+}
+
 static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 	return nfs4_get_acl_uncached(inode, buf, buflen);
 }
 
-static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct page *pages[NFS4ACL_MAXPAGES];
@@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 	return ret;
 }
 
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(inode),
+				__nfs4_proc_set_acl(inode, buf, buflen),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-- 
cgit v0.10.2


From a343bb7750e6a098909c34f5c5dfddbc4fa40053 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:03 -0400
Subject: VFS: Fix access("file", X_OK) in the presence of ACLs

Currently, the access() call will return incorrect information on NFS if
there exists an ACL that grants execute access to the user on a regular
file. The reason the information is incorrect is that the VFS overrides
this execute access in open_exec() by checking (inode->i_mode & 0111).

This patch propagates the VFS execute bit check back into the generic
permission() call.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 64cbae98848c4c99851cb0a405f0b4982cd76c1e commit)

diff --git a/fs/namei.c b/fs/namei.c
index 8631664..432d6bc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask,
 
 int permission(struct inode *inode, int mask, struct nameidata *nd)
 {
+	umode_t mode = inode->i_mode;
 	int retval, submask;
 
 	if (mask & MAY_WRITE) {
-		umode_t mode = inode->i_mode;
 
 		/*
 		 * Nobody gets write access to a read-only fs.
@@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 
+	/*
+	 * MAY_EXEC on regular files requires special handling: We override
+	 * filesystem execute permissions if the mode bits aren't set.
+	 */
+	if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
+		return -EACCES;
+
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	submask = mask & ~MAY_APPEND;
 	if (inode->i_op && inode->i_op->permission)
-- 
cgit v0.10.2


From 9167b0b9a0ab7907191523f5a0528e3b9c288e21 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:03 -0400
Subject: VFS: Remove redundant open-coded mode bit check in prepare_binfmt().

The check in prepare_binfmt() for inode->i_mode & 0111 is redundant,
since open_exec() will already have done that.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 822dec482ced07af32c378cd936d77345786572b commit)

diff --git a/fs/exec.c b/fs/exec.c
index 8344ba7..a6f64a9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -922,12 +922,6 @@ int prepare_binprm(struct linux_binprm *bprm)
 	int retval;
 
 	mode = inode->i_mode;
-	/*
-	 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
-	 * generic_permission lets a non-executable through
-	 */
-	if (!(mode & 0111))	/* with at least _one_ execute bit set */
-		return -EACCES;
 	if (bprm->file->f_op == NULL)
 		return -EACCES;
 
-- 
cgit v0.10.2


From a969fd5a4e162c4485ae8f3e49d674656a18fa36 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:04 -0400
Subject: VFS: Remove redundant open-coded mode bit checks in open_exec().

The check in open_exec() for inode->i_mode & 0111 has been made
redundant by the fix to permission().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 1d3741c5d991686699f100b65b9956f7ee7ae0ae commit)

diff --git a/fs/exec.c b/fs/exec.c
index a6f64a9..f7aabfe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -486,8 +486,6 @@ struct file *open_exec(const char *name)
 		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
 		    S_ISREG(inode->i_mode)) {
 			int err = vfs_permission(&nd, MAY_EXEC);
-			if (!err && !(inode->i_mode & 0111))
-				err = -EACCES;
 			file = ERR_PTR(err);
 			if (!err) {
 				file = nameidata_to_filp(&nd, O_RDONLY);
-- 
cgit v0.10.2


From b2155d0417df9f2b4c0d396b6530b864d9f08623 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dougg@torque.net>
Date: Sat, 19 Aug 2006 00:11:34 -0400
Subject: [SCSI] sg: fix incorrect page problem

There's a problem where sg is executing a ->nopage operation on a
compound page, it actually calls get_page() on the first page in the
compound rather than the page which is being mapped.  The fix is to
select the correct page by indexing into the compound.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 65eef338..34f9343e 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -18,8 +18,8 @@
  *
  */
 
-static int sg_version_num = 30533;	/* 2 digits for each component */
-#define SG_VERSION_STR "3.5.33"
+static int sg_version_num = 30534;	/* 2 digits for each component */
+#define SG_VERSION_STR "3.5.34"
 
 /*
  *  D. P. Gilbert (dgilbert@interlog.com, dougg@triode.net.au), notes:
@@ -60,7 +60,7 @@ static int sg_version_num = 30533;	/* 2 digits for each component */
 
 #ifdef CONFIG_SCSI_PROC_FS
 #include <linux/proc_fs.h>
-static char *sg_version_date = "20050908";
+static char *sg_version_date = "20060818";
 
 static int sg_proc_init(void);
 static void sg_proc_cleanup(void);
@@ -1164,7 +1164,7 @@ sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
 		len = vma->vm_end - sa;
 		len = (len < sg->length) ? len : sg->length;
 		if (offset < len) {
-			page = sg->page;
+			page = virt_to_page(page_address(sg->page) + offset);
 			get_page(page);	/* increment page count */
 			break;
 		}
-- 
cgit v0.10.2


From 9c06938aa458843fb71fa35371f23a3b89317252 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 23 Aug 2006 14:54:54 -0700
Subject: [SCSI] qla2xxx: Correct PLOGI retry logic.

Original code attempts to retry PLOGIs to fcports that are
FCP_TARGETs only.  If the driver never performed a successful
PLOGI/PRLI, the port-type would never be assigned, and the
relogin logic would silently drop the request (and thus the port
would not be recognized and registered).

The fix is relatively straightforward, drop the FCP_TARGET-only
check.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 1e2b95b..65cbe2f 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2238,9 +2238,6 @@ qla2x00_do_dpc(void *data)
 
 			next_loopid = 0;
 			list_for_each_entry(fcport, &ha->fcports, list) {
-				if (fcport->port_type != FCT_TARGET)
-					continue;
-
 				/*
 				 * If the port is not ONLINE then try to login
 				 * to it if we haven't run out of retries.
-- 
cgit v0.10.2


From 476e8978d9ccacfc911bd42e083dd784ad1465b1 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 23 Aug 2006 14:54:55 -0700
Subject: [SCSI] qla2xxx: Properly re-enable EFT support after an ISP abort.

Software must explicitely re-enable extended firmware tracing
after any ISP abort condition.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9758dba..8596491 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3063,6 +3063,7 @@ qla2x00_update_fcports(scsi_qla_host_t *ha)
 int
 qla2x00_abort_isp(scsi_qla_host_t *ha)
 {
+	int rval;
 	unsigned long flags = 0;
 	uint16_t       cnt;
 	srb_t          *sp;
@@ -3119,6 +3120,16 @@ qla2x00_abort_isp(scsi_qla_host_t *ha)
 
 			ha->isp_abort_cnt = 0;
 			clear_bit(ISP_ABORT_RETRY, &ha->dpc_flags);
+
+			if (ha->eft) {
+				rval = qla2x00_trace_control(ha, TC_ENABLE,
+				    ha->eft_dma, EFT_NUM_BUFFERS);
+				if (rval) {
+					qla_printk(KERN_WARNING, ha,
+					    "Unable to reinitialize EFT "
+					    "(%d).\n", rval);
+				}
+			}
 		} else {	/* failed the ISP abort */
 			ha->flags.online = 1;
 			if (test_bit(ISP_ABORT_RETRY, &ha->dpc_flags)) {
-- 
cgit v0.10.2


From 15a3758dc9e1c9d862e46acdf607de70c302fa6a Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Wed, 23 Aug 2006 14:54:56 -0700
Subject: [SCSI] qla2xxx: Update version number to 8.01.07-k1.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index f5826bf..9712590 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,9 +7,9 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.01.05-k4"
+#define QLA2XXX_VERSION      "8.01.07-k1"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	1
-#define QLA_DRIVER_PATCH_VER	5
+#define QLA_DRIVER_PATCH_VER	7
 #define QLA_DRIVER_BETA_VER	0
-- 
cgit v0.10.2


From 0db99e3359234be181590463184b9959059a9ea9 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Sat, 26 Aug 2006 03:00:22 -0400
Subject: [SCSI] fix scsi_send_eh_cmnd regression

The callers of scsi_send_eh_cmnd are setting the cmnd buffer,
and then scsi_send_eh_cmnd is copying that updated buffer to
the old_cmnd variable. Then after the command runs, we end up
copying that old_cmnd var which has the new cmnd to the scsi
command buffer. When this command gets recent, all types of fun
things happen like getting TUR or START_STOP commands with
data and scatterlists.

This patch made against scsi-rc-fixes, has the callers of
scsi_send_eh_cmnd pass in the command so scsi_send_eh_cmnd
can do the right thing. This should go into 2.6.18 since this
fixes a regression added when we removed some of the scsi_cmnd
fields and replaced them with local variables.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 6a5b731..a8ed5a2 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -460,7 +460,8 @@ static void scsi_eh_done(struct scsi_cmnd *scmd)
  * Return value:
  *    SUCCESS or FAILED or NEEDS_RETRY
  **/
-static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout, int copy_sense)
+static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
+			     int cmnd_size, int timeout, int copy_sense)
 {
 	struct scsi_device *sdev = scmd->device;
 	struct Scsi_Host *shost = sdev->host;
@@ -490,6 +491,9 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout, int copy_sense
 	old_cmd_len = scmd->cmd_len;
 	old_use_sg = scmd->use_sg;
 
+	memset(scmd->cmnd, 0, sizeof(scmd->cmnd));
+	memcpy(scmd->cmnd, cmnd, cmnd_size);
+
 	if (copy_sense) {
 		int gfp_mask = GFP_ATOMIC;
 
@@ -610,8 +614,7 @@ static int scsi_request_sense(struct scsi_cmnd *scmd)
 	static unsigned char generic_sense[6] =
 		{REQUEST_SENSE, 0, 0, 0, 252, 0};
 
-	memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
-	return scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT, 1);
+	return scsi_send_eh_cmnd(scmd, generic_sense, 6, SENSE_TIMEOUT, 1);
 }
 
 /**
@@ -736,10 +739,7 @@ static int scsi_eh_tur(struct scsi_cmnd *scmd)
 	int retry_cnt = 1, rtn;
 
 retry_tur:
-	memcpy(scmd->cmnd, tur_command, sizeof(tur_command));
-
-
-	rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT, 0);
+	rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, SENSE_TIMEOUT, 0);
 
 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
 		__FUNCTION__, scmd, rtn));
@@ -839,8 +839,8 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
 	if (scmd->device->allow_restart) {
 		int rtn;
 
-		memcpy(scmd->cmnd, stu_command, sizeof(stu_command));
-		rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT, 0);
+		rtn = scsi_send_eh_cmnd(scmd, stu_command, 6,
+					START_UNIT_TIMEOUT, 0);
 		if (rtn == SUCCESS)
 			return 0;
 	}
-- 
cgit v0.10.2


From 4801bc25f37a969ea773c24d12fd4738541848a1 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Fri, 11 Aug 2006 22:53:08 +0200
Subject: [PATCH] i2c: tps65010 build fixes

The tps65010.c driver in the main tree never got updated with
build fixes since the last batch of I2C driver changes; and the
genirq trigger flags were updated wierdly too.

This also includes a minor tweak to reduce the frequency used to
poll for unplug-the-AC-power on the TPS chips that don't provide
relevant IRQs.  It _would_ be nice to sense whether there's even
a battery, but that'd normally be an HDQ/1-wire interface to a
smart battery, and such APIs aren't standardized.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index e7e2704..0be6fd6 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -43,13 +43,12 @@
 /*-------------------------------------------------------------------------*/
 
 #define	DRIVER_VERSION	"2 May 2005"
-#define	DRIVER_NAME	(tps65010_driver.name)
+#define	DRIVER_NAME	(tps65010_driver.driver.name)
 
 MODULE_DESCRIPTION("TPS6501x Power Management Driver");
 MODULE_LICENSE("GPL");
 
 static unsigned short normal_i2c[] = { 0x48, /* 0x49, */ I2C_CLIENT_END };
-static unsigned short normal_i2c_range[] = { I2C_CLIENT_END };
 
 I2C_CLIENT_INSMOD;
 
@@ -100,7 +99,7 @@ struct tps65010 {
 	/* not currently tracking GPIO state */
 };
 
-#define	POWER_POLL_DELAY	msecs_to_jiffies(800)
+#define	POWER_POLL_DELAY	msecs_to_jiffies(5000)
 
 /*-------------------------------------------------------------------------*/
 
@@ -520,8 +519,11 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
 		goto fail1;
 	}
 
+	/* the IRQ is active low, but many gpio lines can't support that
+	 * so this driver can use falling-edge triggers instead.
+	 */
+	irqflags = IRQF_SAMPLE_RANDOM;
 #ifdef	CONFIG_ARM
-	irqflags = IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_LOW;
 	if (machine_is_omap_h2()) {
 		tps->model = TPS65010;
 		omap_cfg_reg(W4_GPIO58);
@@ -543,8 +545,6 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
 
 		// FIXME set up this board's IRQ ...
 	}
-#else
-	irqflags = IRQF_SAMPLE_RANDOM;
 #endif
 
 	if (tps->irq > 0) {
-- 
cgit v0.10.2


From faf9b616325430422fa13fead88ca7843eb249d6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <j.w.r.degoede@hhs.nl>
Date: Fri, 25 Aug 2006 10:24:20 +0200
Subject: [PATCH] hwmon: abituguru timeout fixes

This patch contains 2 sets of fixes for the abituguru:
 1) Much improved timeout handling, drasticly reducing the amount of
    timeout errors on some motherboards
 2) Fix the exit paths in the bank1 sensor type detect code to always
    restore the original settings even on an error. Without this our
    special test settings could remain seriously confusing the system
    BIOS's setup menu.

Both are very much related and are must haves, to avoid messing up the
uguru CMOS settings.

Detailed changes:
- Much improved timeout / wait for status handling. Many thanks to Sunil
  Kumar, for all his testing, ideas and patches! The code now first busy
  waits, polling the uguru for the expected status as this usually
  succeeds pretty quickly (within 90 reads). To avoid unnecessary CPU burn
  in timeout conditions, the amount of busy waiting has been halved from
  previous versions (120 tries instead of 250). This is not a problem,
  because this version goes to sleep after 120 attemps for 1 jiffy and
  then tries again, it does this sleep and try again 5 times before
  finally giving up. This (almost?) completly removes the timeout errors
  some people have seen regulary. Apparently some older uguru versions
  sometimes are distracted for a (relatively) long time. This solves this.
- These timeout errors not only occur in the sending address part of
  reading the uguru but also in the wait for read state, so errors in
  this state are now handled as retryable just like send address state
  errors and are only logged and reported to userspace if 3 executive
  tries fail.
- Fix a very nasty bug in the bank1 sensor type detection code, where it
  would not restore the original settings in any of the error paths!
- Since not successfully restoring the original settings can seriously
  confuse the system BIOS (hang when entering the relevant setup menu),
  we now try restoring them 3 times before giving up.

Signed-off-by: Hans de Goede <j.w.r.degoede@hhs.nl>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/hwmon/abituguru.c b/drivers/hwmon/abituguru.c
index cc15c4f..35ad1b0 100644
--- a/drivers/hwmon/abituguru.c
+++ b/drivers/hwmon/abituguru.c
@@ -26,6 +26,7 @@
 #include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/err.h>
+#include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
@@ -64,17 +65,17 @@
 #define ABIT_UGURU_IN_SENSOR			0
 #define ABIT_UGURU_TEMP_SENSOR			1
 #define ABIT_UGURU_NC				2
-/* Timeouts / Retries, if these turn out to need a lot of fiddling we could
-   convert them to params. */
-/* 250 was determined by trial and error, 200 works most of the time, but not
-   always. I assume this is cpu-speed independent, since the ISA-bus and not
-   the CPU should be the bottleneck. Note that 250 sometimes is still not
-   enough (only reported on AN7 mb) this is handled by a higher layer. */
-#define ABIT_UGURU_WAIT_TIMEOUT			250
+/* In many cases we need to wait for the uGuru to reach a certain status, most
+   of the time it will reach this status within 30 - 90 ISA reads, and thus we
+   can best busy wait. This define gives the total amount of reads to try. */
+#define ABIT_UGURU_WAIT_TIMEOUT			125
+/* However sometimes older versions of the uGuru seem to be distracted and they
+   do not respond for a long time. To handle this we sleep before each of the
+   last ABIT_UGURU_WAIT_TIMEOUT_SLEEP tries. */
+#define ABIT_UGURU_WAIT_TIMEOUT_SLEEP		5
 /* Normally all expected status in abituguru_ready, are reported after the
-   first read, but sometimes not and we need to poll, 5 polls was not enough
-   50 sofar is. */
-#define ABIT_UGURU_READY_TIMEOUT		50
+   first read, but sometimes not and we need to poll. */
+#define ABIT_UGURU_READY_TIMEOUT		5
 /* Maximum 3 retries on timedout reads/writes, delay 200 ms before retrying */
 #define ABIT_UGURU_MAX_RETRIES			3
 #define ABIT_UGURU_RETRY_DELAY			(HZ/5)
@@ -226,6 +227,10 @@ static int abituguru_wait(struct abituguru_data *data, u8 state)
 		timeout--;
 		if (timeout == 0)
 			return -EBUSY;
+		/* sleep a bit before our last few tries, see the comment on
+		   this where ABIT_UGURU_WAIT_TIMEOUT_SLEEP is defined. */
+		if (timeout <= ABIT_UGURU_WAIT_TIMEOUT_SLEEP)
+			msleep(0);
 	}
 	return 0;
 }
@@ -256,6 +261,7 @@ static int abituguru_ready(struct abituguru_data *data)
 			   "CMD reg does not hold 0xAC after ready command\n");
 			return -EIO;
 		}
+		msleep(0);
 	}
 
 	/* After this the ABIT_UGURU_DATA port should contain
@@ -268,6 +274,7 @@ static int abituguru_ready(struct abituguru_data *data)
 				"state != more input after ready command\n");
 			return -EIO;
 		}
+		msleep(0);
 	}
 
 	data->uguru_ready = 1;
@@ -331,7 +338,8 @@ static int abituguru_read(struct abituguru_data *data,
 	/* And read the data */
 	for (i = 0; i < count; i++) {
 		if (abituguru_wait(data, ABIT_UGURU_STATUS_READ)) {
-			ABIT_UGURU_DEBUG(1, "timeout exceeded waiting for "
+			ABIT_UGURU_DEBUG(retries ? 1 : 3,
+				"timeout exceeded waiting for "
 				"read state (bank: %d, sensor: %d)\n",
 				(int)bank_addr, (int)sensor_addr);
 			break;
@@ -350,7 +358,9 @@ static int abituguru_read(struct abituguru_data *data,
 static int abituguru_write(struct abituguru_data *data,
 	u8 bank_addr, u8 sensor_addr, u8 *buf, int count)
 {
-	int i;
+	/* We use the ready timeout as we have to wait for 0xAC just like the
+	   ready function */
+	int i, timeout = ABIT_UGURU_READY_TIMEOUT;
 
 	/* Send the address */
 	i = abituguru_send_address(data, bank_addr, sensor_addr,
@@ -370,7 +380,8 @@ static int abituguru_write(struct abituguru_data *data,
 	}
 
 	/* Now we need to wait till the chip is ready to be read again,
-	   don't ask why */
+	   so that we can read 0xAC as confirmation that our write has
+	   succeeded. */
 	if (abituguru_wait(data, ABIT_UGURU_STATUS_READ)) {
 		ABIT_UGURU_DEBUG(1, "timeout exceeded waiting for read state "
 			"after write (bank: %d, sensor: %d)\n", (int)bank_addr,
@@ -379,11 +390,15 @@ static int abituguru_write(struct abituguru_data *data,
 	}
 
 	/* Cmd port MUST be read now and should contain 0xAC */
-	if (inb_p(data->addr + ABIT_UGURU_CMD) != 0xAC) {
-		ABIT_UGURU_DEBUG(1, "CMD reg does not hold 0xAC after write "
-			"(bank: %d, sensor: %d)\n", (int)bank_addr,
-			(int)sensor_addr);
-		return -EIO;
+	while (inb_p(data->addr + ABIT_UGURU_CMD) != 0xAC) {
+		timeout--;
+		if (timeout == 0) {
+			ABIT_UGURU_DEBUG(1, "CMD reg does not hold 0xAC after "
+				"write (bank: %d, sensor: %d)\n",
+				(int)bank_addr, (int)sensor_addr);
+			return -EIO;
+		}
+		msleep(0);
 	}
 
 	/* Last put the chip back in ready state */
@@ -403,7 +418,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
 				   u8 sensor_addr)
 {
 	u8 val, buf[3];
-	int ret = ABIT_UGURU_NC;
+	int i, ret = -ENODEV; /* error is the most common used retval :| */
 
 	/* If overriden by the user return the user selected type */
 	if (bank1_types[sensor_addr] >= ABIT_UGURU_IN_SENSOR &&
@@ -439,7 +454,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
 	buf[2] = 250;
 	if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr,
 			buf, 3) != 3)
-		return -ENODEV;
+		goto abituguru_detect_bank1_sensor_type_exit;
 	/* Now we need 20 ms to give the uguru time to read the sensors
 	   and raise a voltage alarm */
 	set_current_state(TASK_UNINTERRUPTIBLE);
@@ -447,21 +462,16 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
 	/* Check for alarm and check the alarm is a volt low alarm. */
 	if (abituguru_read(data, ABIT_UGURU_ALARM_BANK, 0, buf, 3,
 			ABIT_UGURU_MAX_RETRIES) != 3)
-		return -ENODEV;
+		goto abituguru_detect_bank1_sensor_type_exit;
 	if (buf[sensor_addr/8] & (0x01 << (sensor_addr % 8))) {
 		if (abituguru_read(data, ABIT_UGURU_SENSOR_BANK1 + 1,
 				sensor_addr, buf, 3,
 				ABIT_UGURU_MAX_RETRIES) != 3)
-			return -ENODEV;
+			goto abituguru_detect_bank1_sensor_type_exit;
 		if (buf[0] & ABIT_UGURU_VOLT_LOW_ALARM_FLAG) {
-			/* Restore original settings */
-			if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2,
-					sensor_addr,
-					data->bank1_settings[sensor_addr],
-					3) != 3)
-				return -ENODEV;
 			ABIT_UGURU_DEBUG(2, "  found volt sensor\n");
-			return ABIT_UGURU_IN_SENSOR;
+			ret = ABIT_UGURU_IN_SENSOR;
+			goto abituguru_detect_bank1_sensor_type_exit;
 		} else
 			ABIT_UGURU_DEBUG(2, "  alarm raised during volt "
 				"sensor test, but volt low flag not set\n");
@@ -477,7 +487,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
 	buf[2] = 10;
 	if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr,
 			buf, 3) != 3)
-		return -ENODEV;
+		goto abituguru_detect_bank1_sensor_type_exit;
 	/* Now we need 50 ms to give the uguru time to read the sensors
 	   and raise a temp alarm */
 	set_current_state(TASK_UNINTERRUPTIBLE);
@@ -485,15 +495,16 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
 	/* Check for alarm and check the alarm is a temp high alarm. */
 	if (abituguru_read(data, ABIT_UGURU_ALARM_BANK, 0, buf, 3,
 			ABIT_UGURU_MAX_RETRIES) != 3)
-		return -ENODEV;
+		goto abituguru_detect_bank1_sensor_type_exit;
 	if (buf[sensor_addr/8] & (0x01 << (sensor_addr % 8))) {
 		if (abituguru_read(data, ABIT_UGURU_SENSOR_BANK1 + 1,
 				sensor_addr, buf, 3,
 				ABIT_UGURU_MAX_RETRIES) != 3)
-			return -ENODEV;
+			goto abituguru_detect_bank1_sensor_type_exit;
 		if (buf[0] & ABIT_UGURU_TEMP_HIGH_ALARM_FLAG) {
-			ret = ABIT_UGURU_TEMP_SENSOR;
 			ABIT_UGURU_DEBUG(2, "  found temp sensor\n");
+			ret = ABIT_UGURU_TEMP_SENSOR;
+			goto abituguru_detect_bank1_sensor_type_exit;
 		} else
 			ABIT_UGURU_DEBUG(2, "  alarm raised during temp "
 				"sensor test, but temp high flag not set\n");
@@ -501,11 +512,23 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
 		ABIT_UGURU_DEBUG(2, "  alarm not raised during temp sensor "
 			"test\n");
 
-	/* Restore original settings */
-	if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr,
-			data->bank1_settings[sensor_addr], 3) != 3)
+	ret = ABIT_UGURU_NC;
+abituguru_detect_bank1_sensor_type_exit:
+	/* Restore original settings, failing here is really BAD, it has been
+	   reported that some BIOS-es hang when entering the uGuru menu with
+	   invalid settings present in the uGuru, so we try this 3 times. */
+	for (i = 0; i < 3; i++)
+		if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2,
+				sensor_addr, data->bank1_settings[sensor_addr],
+				3) == 3)
+			break;
+	if (i == 3) {
+		printk(KERN_ERR ABIT_UGURU_NAME
+			": Fatal error could not restore original settings. "
+			"This should never happen please report this to the "
+			"abituguru maintainer (see MAINTAINERS)\n");
 		return -ENODEV;
-
+	}
 	return ret;
 }
 
@@ -1305,7 +1328,7 @@ static struct abituguru_data *abituguru_update_device(struct device *dev)
 		data->update_timeouts = 0;
 LEAVE_UPDATE:
 		/* handle timeout condition */
-		if (err == -EBUSY) {
+		if (!success && (err == -EBUSY || err >= 0)) {
 			/* No overflow please */
 			if (data->update_timeouts < 255u)
 				data->update_timeouts++;
-- 
cgit v0.10.2


From 954c0b7cd5b9aaa11fb67a0c011fcb5e5897385a Mon Sep 17 00:00:00 2001
From: Daniel Ritz <daniel.ritz-ml@swissonline.ch>
Date: Tue, 22 Aug 2006 07:29:08 -0700
Subject: [PATCH] PCI: use PCBIOS as last fallback

there was a change in 2.6.17 which affected the order in which the PCI
access methods are probed.  this gives regressions on some machines with
broken BIOS.  the problem is that PCBIOS sometimes reports last bus wrong,
leaving cardbus non-funcational.  previously those system worked fine with
direct access.

The patch changes the PCI init code to have PCBIOS as last fallback, yet
the PCBIOS code still has to run first to set pcibios_last_bus to the value
reported by the BIOS.  this is needed in case legacy PCI probing
(arch/i386/pci/legacy.c) is used to detect peer busses.  using direct
access if available fixes the cardbus problems.

Signed-off-by: Daniel Ritz <daniel.ritz@gmx.ch>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c
index c7650a7..51087a9 100644
--- a/arch/i386/pci/init.c
+++ b/arch/i386/pci/init.c
@@ -14,8 +14,12 @@ static __init int pci_access_init(void)
 #ifdef CONFIG_PCI_BIOS
 	pci_pcbios_init();
 #endif
-	if (raw_pci_ops)
-		return 0;
+	/*
+	 * don't check for raw_pci_ops here because we want pcbios as last
+	 * fallback, yet it's needed to run first to set pcibios_last_bus
+	 * in case legacy PCI probing is used. otherwise detecting peer busses
+	 * fails.
+	 */
 #ifdef CONFIG_PCI_DIRECT
 	pci_direct_init();
 #endif
-- 
cgit v0.10.2


From fd4dc27cff15f77cde218613baa36b855c85ad9f Mon Sep 17 00:00:00 2001
From: Daniel Ritz <daniel.ritz-ml@swissonline.ch>
Date: Tue, 22 Aug 2006 07:29:09 -0700
Subject: [PATCH] PCI: i386 mmconfig: don't forget bus number when setting
 fallback_slots bits

On i386 PCI mmconfig forgets the bus number when setting the fallback_slots
bits which means fallback to conf1 only works for bus 0.

Signed-off-by: Daniel Ritz <daniel.ritz@gmx.ch>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index e545b09..972180f 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -178,7 +178,7 @@ static __init void unreachable_devices(void)
 				pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0));
 			if (addr == 0 ||
 			    readl((u32 __iomem *)mmcfg_virt_addr) != val1) {
-				set_bit(i, fallback_slots);
+				set_bit(i + 32*k, fallback_slots);
 				printk(KERN_NOTICE
 			"PCI: No mmconfig possible on %x:%x\n", k, i);
 			}
-- 
cgit v0.10.2


From 65ae4dddbb56c7415c31e9aae0b3811cb583bbea Mon Sep 17 00:00:00 2001
From: Daniel Ritz <daniel.ritz-ml@swissonline.ch>
Date: Tue, 22 Aug 2006 07:29:10 -0700
Subject: [PATCH] PCI: fix ICH6 quirks

- add the ICH6(R) LPC to the ICH6 ACPI quirks.  currently only the ICH6-M
  is handled.  [ PCI_DEVICE_ID_INTEL_ICH6_1 is the ICH6-M LPC, ICH6_0 is
  the ICH6(R) ]

- remove the wrong quirk calling asus_hides_smbus_lpc() for ICH6.  the
  register modified in asus_hides_smbus_lpc() has a different meaning in
  ICH6.

Signed-off-by: Daniel Ritz <daniel.ritz@gmx.ch>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 04618d4..7317742 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -438,6 +438,7 @@ static void __devinit quirk_ich6_lpc_acpi(struct pci_dev *dev)
 	pci_read_config_dword(dev, 0x48, &region);
 	quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1, "ICH6 GPIO");
 }
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_0, quirk_ich6_lpc_acpi );
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1, quirk_ich6_lpc_acpi );
 
 /*
@@ -1091,7 +1092,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801CA_0,	asu
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801CA_12,	asus_hides_smbus_lpc );
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_12,	asus_hides_smbus_lpc );
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801EB_0,	asus_hides_smbus_lpc );
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ICH6_1,	asus_hides_smbus_lpc );
 
 static void __init asus_hides_smbus_lpc_ich6(struct pci_dev *dev)
 {
-- 
cgit v0.10.2


From cc702c2c5e5ab51e1b0d3386d447e7b1772879ee Mon Sep 17 00:00:00 2001
From: Scott Murray <scottm@somanetworks.com>
Date: Tue, 22 Aug 2006 19:55:57 -0400
Subject: [PATCH] CPCI hotplug: fix resource assignment

Here is a patch against the CPCI hotplug core to fix up PCI resource
assignment such that things will actually work when a hot inserted
device is enabled.  I mentioned this patch to you way back in April at
ELC, but am only now out from under things enough to clean it up and
submit it.  I've basically cribbed the corresponding code from
shpchp_pci.c, so there are no big surprises.  If it's still possible, I
wouldn't mind this going into 2.6.18, but it wouldn't be the end of the
world if it went into 2.6.19.

Signed-off-by: Scott Murray <scottm@somanetworks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c
index 02be74c..4afcaff 100644
--- a/drivers/pci/hotplug/cpci_hotplug_pci.c
+++ b/drivers/pci/hotplug/cpci_hotplug_pci.c
@@ -254,8 +254,8 @@ int cpci_led_off(struct slot* slot)
 
 int cpci_configure_slot(struct slot* slot)
 {
-	unsigned char busnr;
-	struct pci_bus *child;
+	struct pci_bus *parent;
+	int fn;
 
 	dbg("%s - enter", __FUNCTION__);
 
@@ -276,23 +276,53 @@ int cpci_configure_slot(struct slot* slot)
 		 */
 		n = pci_scan_slot(slot->bus, slot->devfn);
 		dbg("%s: pci_scan_slot returned %d", __FUNCTION__, n);
-		if (n > 0)
-			pci_bus_add_devices(slot->bus);
 		slot->dev = pci_get_slot(slot->bus, slot->devfn);
 		if (slot->dev == NULL) {
 			err("Could not find PCI device for slot %02x", slot->number);
-			return 1;
+			return -ENODEV;
 		}
 	}
-
-	if (slot->dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-		pci_read_config_byte(slot->dev, PCI_SECONDARY_BUS, &busnr);
-		child = pci_add_new_bus(slot->dev->bus, slot->dev, busnr);
-		pci_do_scan_bus(child);
-		pci_bus_size_bridges(child);
+	parent = slot->dev->bus;
+
+	for (fn = 0; fn < 8; fn++) {
+		struct pci_dev *dev;
+
+		dev = pci_get_slot(parent, PCI_DEVFN(PCI_SLOT(slot->devfn), fn));
+		if (!dev)
+			continue;
+		if ((dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) ||
+		    (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) {
+			/* Find an unused bus number for the new bridge */
+			struct pci_bus *child;
+			unsigned char busnr, start = parent->secondary;
+			unsigned char end = parent->subordinate;
+
+			for (busnr = start; busnr <= end; busnr++) {
+				if (!pci_find_bus(pci_domain_nr(parent),
+						  busnr))
+					break;
+			}
+			if (busnr >= end) {
+				err("No free bus for hot-added bridge\n");
+				pci_dev_put(dev);
+				continue;
+			}
+			child = pci_add_new_bus(parent, dev, busnr);
+			if (!child) {
+				err("Cannot add new bus for %s\n",
+				    pci_name(dev));
+				pci_dev_put(dev);
+				continue;
+			}
+			child->subordinate = pci_do_scan_bus(child);
+			pci_bus_size_bridges(child);
+		}
+		pci_dev_put(dev);
 	}
 
-	pci_bus_assign_resources(slot->dev->bus);
+	pci_bus_assign_resources(parent);
+	pci_bus_add_devices(parent);
+	pci_enable_bridges(parent);
 
 	dbg("%s - exit", __FUNCTION__);
 	return 0;
-- 
cgit v0.10.2


From 39ba487fe22a63b3df7c543c82d01db0f0fed700 Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Tue, 15 Aug 2006 10:57:16 +0200
Subject: [PATCH] PCI: kerneldoc correction in pci-driver

Removes an unused kerneldoc entry from pci_match_device and
put the others into correct order.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 10e1a90..474e9cd 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -139,9 +139,8 @@ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
 /**
  * pci_match_device - Tell if a PCI device structure has a matching
  *                    PCI device id structure
- * @ids: array of PCI device id structures to search in
- * @dev: the PCI device structure to match against
  * @drv: the PCI driver to match against
+ * @dev: the PCI device structure to match against
  *
  * Used by a driver to check whether a PCI device present in the
  * system is in its list of supported devices.  Returns the matching
-- 
cgit v0.10.2


From b93b58eea81beacac55770dd39e34fbffdc36000 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 14 Aug 2006 17:32:51 -0700
Subject: [PATCH] USB: fix bug in cypress_cy7c63.c driver

This was pointed out by Adrian Bunk <bunk@stusta.de>, as found by the Coverity Checker.

Cc: Adrian Bunk <bunk@stusta.de>
Cc: Oliver Bock <o.bock@fh-wolfenbuettel.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/misc/cypress_cy7c63.c b/drivers/usb/misc/cypress_cy7c63.c
index a4062a6..9c46746 100644
--- a/drivers/usb/misc/cypress_cy7c63.c
+++ b/drivers/usb/misc/cypress_cy7c63.c
@@ -208,7 +208,7 @@ static int cypress_probe(struct usb_interface *interface,
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (dev == NULL) {
-		dev_err(&dev->udev->dev, "Out of memory!\n");
+		dev_err(&interface->dev, "Out of memory!\n");
 		goto error;
 	}
 
-- 
cgit v0.10.2


From be729523369ec7825bde4a0e774ee7932e9d3dff Mon Sep 17 00:00:00 2001
From: Tomasz Kazmierczak <tomek.fizyk@op.pl>
Date: Fri, 18 Aug 2006 23:43:29 +0200
Subject: [PATCH] USB: pl2303: removed support for OTi's DKU-5 clone cable

This patch removes support for a clone of Nokia DKU-5 cable made by Ours
Technology Inc, as it turned out that the cable does not use the pl2303
chip, but OTI-6858 chip which is not compatible with the pl2303.

Signed-off-by: Tomasz Kazmierczak <tomek.fizyk@op.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index efbbc0a..65e4d04 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -79,7 +79,6 @@ static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(SAGEM_VENDOR_ID, SAGEM_PRODUCT_ID) },
 	{ USB_DEVICE(LEADTEK_VENDOR_ID, LEADTEK_9531_PRODUCT_ID) },
 	{ USB_DEVICE(SPEEDDRAGON_VENDOR_ID, SPEEDDRAGON_PRODUCT_ID) },
-	{ USB_DEVICE(OTI_VENDOR_ID, OTI_PRODUCT_ID) },
 	{ USB_DEVICE(DATAPILOT_U2_VENDOR_ID, DATAPILOT_U2_PRODUCT_ID) },
 	{ USB_DEVICE(BELKIN_VENDOR_ID, BELKIN_PRODUCT_ID) },
 	{ }					/* Terminating entry */
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index a692ac6..55195e7 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -82,10 +82,6 @@
 #define SPEEDDRAGON_VENDOR_ID	0x0e55
 #define SPEEDDRAGON_PRODUCT_ID	0x110b
 
-/* Ours Technology Inc DKU-5 clone, chipset: Prolific Technology Inc */
-#define OTI_VENDOR_ID	0x0ea0
-#define OTI_PRODUCT_ID	0x6858
-
 /* DATAPILOT Universal-2 Phone Cable */
 #define DATAPILOT_U2_VENDOR_ID	0x0731
 #define DATAPILOT_U2_PRODUCT_ID	0x2003
-- 
cgit v0.10.2


From 94918ff68a7c78fdd3241d13e18444ef80b8ff4a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 14 Aug 2006 11:40:46 -0400
Subject: [PATCH] unusual_devs update for UCR-61S2B

The existing unusual_devs entry for the UCR-61S2B appears to have too
wide a revision range.  It matches at least one device that doesn't
respond to the initialization sequence.  Perhaps the sequence needs to
be updated, or perhaps something else can be done.  For now, this patch
(as764) restricts the range to include only the revision mentioned in
the original comment.

This resolves (for now!) Bugzilla entry #6950.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index fd158e0..4a803d6 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1261,7 +1261,7 @@ UNUSUAL_DEV(  0x0fce, 0xd008, 0x0000, 0x0000,
  * Tested on hardware version 1.10.
  * Entry is needed only for the initializer function override.
  */
-UNUSUAL_DEV(  0x1019, 0x0c55, 0x0000, 0x9999,
+UNUSUAL_DEV(  0x1019, 0x0c55, 0x0110, 0x0110,
 		"Desknote",
 		"UCR-61S2B",
 		US_SC_DEVICE, US_PR_DEVICE, usb_stor_ucr61s2b_init,
-- 
cgit v0.10.2


From a23b423ec0573ead85e2c92f2a5df5d4bf47e26b Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Fri, 25 Aug 2006 00:46:02 -0700
Subject: [SPARC]: enabling of the 2nd CPU in 2.6.18-rc4

smp_setup_cpu_possible_map() needs to run after paging_init()
so that the in-kernel device tree is setup.

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c
index 35488d6..0251cab 100644
--- a/arch/sparc/kernel/setup.c
+++ b/arch/sparc/kernel/setup.c
@@ -348,9 +348,9 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.context = (unsigned long) NO_CONTEXT;
 	init_task.thread.kregs = &fake_swapper_regs;
 
-	smp_setup_cpu_possible_map();
-
 	paging_init();
+
+	smp_setup_cpu_possible_map();
 }
 
 static int __init set_preferred_console(void)
-- 
cgit v0.10.2


From 5fec811e99bb6f537e67b6bcbe9fe6505aa8a114 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Fri, 25 Aug 2006 16:22:21 -0700
Subject: [SPARC]: Small smp cleanup.

It moves the smp_procesors_ready variable to sun4d_smp.c only.

Signed-off-by: Krzysztof Helt (krzysztof.h1@wp.pl)
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c
index e311ade..276f228 100644
--- a/arch/sparc/kernel/smp.c
+++ b/arch/sparc/kernel/smp.c
@@ -34,7 +34,6 @@
 #include <asm/tlbflush.h>
 #include <asm/cpudata.h>
 
-volatile int smp_processors_ready = 0;
 int smp_num_cpus = 1;
 volatile unsigned long cpu_callin_map[NR_CPUS] __initdata = {0,};
 unsigned char boot_cpu_id = 0;
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index ba843f6..3ff4edd 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -42,7 +42,7 @@ extern ctxd_t *srmmu_ctx_table_phys;
 
 extern void calibrate_delay(void);
 
-extern volatile int smp_processors_ready;
+static volatile int smp_processors_ready = 0;
 static int smp_highest_cpu;
 extern volatile unsigned long cpu_callin_map[NR_CPUS];
 extern cpuinfo_sparc cpu_data[NR_CPUS];
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 3b32096..7d4a649 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -39,7 +39,6 @@ extern ctxd_t *srmmu_ctx_table_phys;
 
 extern void calibrate_delay(void);
 
-extern volatile int smp_processors_ready;
 extern volatile unsigned long cpu_callin_map[NR_CPUS];
 extern unsigned char boot_cpu_id;
 
@@ -217,7 +216,6 @@ void __init smp4m_smp_done(void)
 	}
 
 	/* Ok, they are spinning and ready to go. */
-	smp_processors_ready = 1;
 }
 
 /* At each hardware IRQ, we get this called to forward IRQ reception
-- 
cgit v0.10.2


From 81a42d298d8bd1b96be4bd459494f25fdd99b594 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Aug 2006 15:58:57 -0700
Subject: [DISKLABEL] SUN: Fix signed int usage for sector count

The current sun disklabel code uses a signed int for the sector count.
When partitions larger than 1 TB are used, the cast to a sector_t causes
the partition sizes to be invalid:

 # cat /proc/paritions | grep sdan
   66   112 2146435072 sdan
   66   115 9223372036853660736 sdan3
   66   120 9223372036853660736 sdan8

This patch switches the sector count to an unsigned int to fix this.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index abe91ca..0a5927c 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -74,7 +74,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
 	spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
 	for (i = 0; i < 8; i++, p++) {
 		unsigned long st_sector;
-		int num_sectors;
+		unsigned int num_sectors;
 
 		st_sector = be32_to_cpu(p->start_cylinder) * spc;
 		num_sectors = be32_to_cpu(p->num_sectors);
-- 
cgit v0.10.2


From 897522ea1c20691b6a65f32f03ae4e77e508b31c Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Date: Fri, 25 Aug 2006 00:52:06 -0700
Subject: [CONNECTOR]: Add userspace example code into Documentation/connector/

I was asked several times to include userspace example code into
Documentation, so if there is no policy against it, consider attached patch
for 2.6.18. This program works with included Documentation/connector/cn_test.c
connector module.

Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c
new file mode 100644
index 0000000..d738cde
--- /dev/null
+++ b/Documentation/connector/ucon.c
@@ -0,0 +1,206 @@
+/*
+ * 	ucon.c
+ *
+ * Copyright (c) 2004+ Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <asm/types.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+
+#include <linux/connector.h>
+
+#define DEBUG
+#define NETLINK_CONNECTOR 	11
+
+#ifdef DEBUG
+#define ulog(f, a...) fprintf(stdout, f, ##a)
+#else
+#define ulog(f, a...) do {} while (0)
+#endif
+
+static int need_exit;
+static __u32 seq;
+
+static int netlink_send(int s, struct cn_msg *msg)
+{
+	struct nlmsghdr *nlh;
+	unsigned int size;
+	int err;
+	char buf[128];
+	struct cn_msg *m;
+
+	size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len);
+
+	nlh = (struct nlmsghdr *)buf;
+	nlh->nlmsg_seq = seq++;
+	nlh->nlmsg_pid = getpid();
+	nlh->nlmsg_type = NLMSG_DONE;
+	nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh));
+	nlh->nlmsg_flags = 0;
+
+	m = NLMSG_DATA(nlh);
+#if 0
+	ulog("%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n",
+	       __func__, msg->id.idx, msg->id.val, msg->len, msg->seq, msg->ack);
+#endif
+	memcpy(m, msg, sizeof(*m) + msg->len);
+
+	err = send(s, nlh, size, 0);
+	if (err == -1)
+		ulog("Failed to send: %s [%d].\n",
+			strerror(errno), errno);
+
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int s;
+	char buf[1024];
+	int len;
+	struct nlmsghdr *reply;
+	struct sockaddr_nl l_local;
+	struct cn_msg *data;
+	FILE *out;
+	time_t tm;
+	struct pollfd pfd;
+
+	if (argc < 2)
+		out = stdout;
+	else {
+		out = fopen(argv[1], "a+");
+		if (!out) {
+			ulog("Unable to open %s for writing: %s\n",
+				argv[1], strerror(errno));
+			out = stdout;
+		}
+	}
+
+	memset(buf, 0, sizeof(buf));
+
+	s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+	if (s == -1) {
+		perror("socket");
+		return -1;
+	}
+
+	l_local.nl_family = AF_NETLINK;
+	l_local.nl_groups = 0x123; /* bitmask of requested groups */
+	l_local.nl_pid = 0;
+
+	if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
+		perror("bind");
+		close(s);
+		return -1;
+	}
+
+#if 0
+	{
+		int on = 0x57; /* Additional group number */
+		setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &on, sizeof(on));
+	}
+#endif
+	if (0) {
+		int i, j;
+
+		memset(buf, 0, sizeof(buf));
+
+		data = (struct cn_msg *)buf;
+
+		data->id.idx = 0x123;
+		data->id.val = 0x456;
+		data->seq = seq++;
+		data->ack = 0;
+		data->len = 0;
+
+		for (j=0; j<10; ++j) {
+			for (i=0; i<1000; ++i) {
+				len = netlink_send(s, data);
+			}
+
+			ulog("%d messages have been sent to %08x.%08x.\n", i, data->id.idx, data->id.val);
+		}
+
+		return 0;
+	}
+
+
+	pfd.fd = s;
+
+	while (!need_exit) {
+		pfd.events = POLLIN;
+		pfd.revents = 0;
+		switch (poll(&pfd, 1, -1)) {
+			case 0:
+				need_exit = 1;
+				break;
+			case -1:
+				if (errno != EINTR) {
+					need_exit = 1;
+					break;
+				}
+				continue;
+		}
+		if (need_exit)
+			break;
+
+		memset(buf, 0, sizeof(buf));
+		len = recv(s, buf, sizeof(buf), 0);
+		if (len == -1) {
+			perror("recv buf");
+			close(s);
+			return -1;
+		}
+		reply = (struct nlmsghdr *)buf;
+
+		switch (reply->nlmsg_type) {
+		case NLMSG_ERROR:
+			fprintf(out, "Error message received.\n");
+			fflush(out);
+			break;
+		case NLMSG_DONE:
+			data = (struct cn_msg *)NLMSG_DATA(reply);
+
+			time(&tm);
+			fprintf(out, "%.24s : [%x.%x] [%08u.%08u].\n",
+				ctime(&tm), data->id.idx, data->id.val, data->seq, data->ack);
+			fflush(out);
+			break;
+		default:
+			break;
+		}
+	}
+
+	close(s);
+	return 0;
+}
-- 
cgit v0.10.2


From 59eed279c5daa88d95e429782ddb8ef87e52c44b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 25 Aug 2006 15:55:43 -0700
Subject: [IPV6]: Segmentation offload not set correctly on TCP children

TCP over IPV6 would incorrectly inherit the GSO settings.
This would cause kernel to send Tcp Segmentation Offload packets for
IPV6 data to devices that can't handle it. It caused the sky2 driver
to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050
and the e1000 would generate bogus packets. I can't blame the
hardware for gagging if the upper layers feed it garbage.

This was a new bug in 2.6.18 introduced with GSO support.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b843a65..802a1a6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -944,7 +944,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	 * comment in that function for the gory details. -acme
 	 */
 
-	sk->sk_gso_type = SKB_GSO_TCPV6;
+	newsk->sk_gso_type = SKB_GSO_TCPV6;
 	__ip6_dst_store(newsk, dst, NULL);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
-- 
cgit v0.10.2


From f3166c07175c1639687288006aeabed363a921f3 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:01:03 -0700
Subject: [DCCP]: Fix typo

This fixes a small typo in net/dccp/libs/packet_history.c

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index ad98d6a..6739be1 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,5 +1,5 @@
 /*
- *  net/dccp/packet_history.h
+ *  net/dccp/packet_history.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
  *
-- 
cgit v0.10.2


From e6bccd357343e98db9e1fd0d487f4f924e1a7921 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:01:30 -0700
Subject: [DCCP]: Update contact details and copyright

Just updating copyright and contacts

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/CREDITS b/CREDITS
index 29be6d1..0fe904e 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2209,7 +2209,7 @@ S: (address available on request)
 S: USA
 
 N: Ian McDonald
-E: iam4@cs.waikato.ac.nz
+E: ian.mcdonald@jandi.co.nz
 E: imcdnzl@gmail.com
 W: http://wand.net.nz/~iam4
 W: http://imcdnzl.blogspot.com
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c39bff7..0f85970 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -2,7 +2,7 @@
  *  net/dccp/ccids/ccid3.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005-6 Ian McDonald <imcdnzl@gmail.com>
+ *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
  *  An implementation of the DCCP protocol
  *
@@ -1230,7 +1230,7 @@ static __exit void ccid3_module_exit(void)
 }
 module_exit(ccid3_module_exit);
 
-MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
 	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
 MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
 MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 5ade4f6..22cb9f8 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,13 +1,13 @@
 /*
  *  net/dccp/ccids/ccid3.h
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
  *
  *  An implementation of the DCCP protocol
  *
  *  This code has been developed by the University of Waikato WAND
  *  research group. For further information please see http://www.wand.net.nz/
- *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
  *
  *  This code also uses code from Lulea University, rereleased as GPL by its
  *  authors:
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5d7b7d8..b93d9fc 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -2,7 +2,7 @@
  *  net/dccp/ccids/lib/loss_interval.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  This program is free software; you can redistribute it and/or modify
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 43bf782..dcb370a 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -4,7 +4,7 @@
  *  net/dccp/ccids/lib/loss_interval.h
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  This program is free software; you can redistribute it and/or modify it
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6739be1..7b6b03e 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,13 +1,13 @@
 /*
  *  net/dccp/packet_history.c
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
  *
  *  An implementation of the DCCP protocol
  *
  *  This code has been developed by the University of Waikato WAND
  *  research group. For further information please see http://www.wand.net.nz/
- *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
  *
  *  This code also uses code from Lulea University, rereleased as GPL by its
  *  authors:
@@ -391,7 +391,7 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
 
-MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
 	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
 MODULE_DESCRIPTION("DCCP TFRC library");
 MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 673c209..27c4309 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -1,13 +1,13 @@
 /*
  *  net/dccp/packet_history.h
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
  *
  *  An implementation of the DCCP protocol
  *
  *  This code has been developed by the University of Waikato WAND
  *  research group. For further information please see http://www.wand.net.nz/
- *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
  *
  *  This code also uses code from Lulea University, rereleased as GPL by its
  *  authors:
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 130c4c4..45f30f5 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -4,7 +4,7 @@
  *  net/dccp/ccids/lib/tfrc.h
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
  *
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 4fd2ebe..44076e0 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -2,7 +2,7 @@
  *  net/dccp/ccids/lib/tfrc_equation.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
  *
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index d00a2f4..b8931d3 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -5,7 +5,7 @@
  *
  *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
  *	This program is free software; you can redistribute it and/or modify it
  *	under the terms of the GNU General Public License version 2 as
diff --git a/net/dccp/options.c b/net/dccp/options.c
index daf72bb..07a3469 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -4,7 +4,7 @@
  *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
-- 
cgit v0.10.2


From 837d107cd101fbf734e9ea2bbb5c7336a329e432 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:06:42 -0700
Subject: [DCCP]: Introduces follows48 function

This adds a new function to see if two sequence numbers follow each
other.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b8931d3..a5c5475 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -81,6 +81,14 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
 	return after48(seq1, seq2) ? seq1 : seq2;
 }
 
+/* is seq1 next seqno after seq2 */
+static inline int follows48(const u64 seq1, const u64 seq2)
+{
+	int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
+
+	return diff==1;
+}
+
 enum {
 	DCCP_MIB_NUM = 0,
 	DCCP_MIB_ACTIVEOPENS,			/* ActiveOpens */
-- 
cgit v0.10.2


From 80193aee18bc862e284ba18504f3a3e14706a997 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:07:36 -0700
Subject: [DCCP]: Introduce dccp_rx_hist_find_entry

This adds a new function dccp_rx_hist_find_entry.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 7b6b03e..420c60f 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -365,6 +365,25 @@ struct dccp_tx_hist_entry *
 
 EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
 
+int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+   u8 *ccval)
+{
+	struct dccp_rx_hist_entry *packet = NULL, *entry;
+
+	list_for_each_entry(entry, list, dccphrx_node)
+		if (entry->dccphrx_seqno == seq) {
+			packet = entry;
+			break;
+		}
+
+	if (packet)
+		*ccval = packet->dccphrx_ccval;
+
+	return packet != NULL;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
+
 void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
 			      struct list_head *list,
 			      struct dccp_tx_hist_entry *packet)
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 27c4309..aea9c5d 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -106,6 +106,8 @@ static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
 extern struct dccp_tx_hist_entry *
 			dccp_tx_hist_find_entry(const struct list_head *list,
 						const u64 seq);
+extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+   u8 *ccval);
 
 static inline void dccp_tx_hist_add_entry(struct list_head *list,
 					  struct dccp_tx_hist_entry *entry)
-- 
cgit v0.10.2


From 3a13813e6effcfad5910d47b15b724621b50b878 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Sat, 26 Aug 2006 20:28:30 -0700
Subject: [BRIDGE] netfilter: memory corruption fix

The bridge-netfilter code will overwrite memory if there is not
headroom in the skb to save the header.  This first showed up when
using Xen with sky2 driver that doesn't allocate the extra space.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 10c13dc..427c67f 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -48,15 +48,25 @@ enum nf_br_hook_priorities {
 
 /* Only used in br_forward.c */
 static inline
-void nf_bridge_maybe_copy_header(struct sk_buff *skb)
+int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 {
+	int err;
+
 	if (skb->nf_bridge) {
 		if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+			err = skb_cow(skb, 18);
+			if (err)
+				return err;
 			memcpy(skb->data - 18, skb->nf_bridge->data, 18);
 			skb_push(skb, 4);
-		} else
+		} else {
+			err = skb_cow(skb, 16);
+			if (err)
+				return err;
 			memcpy(skb->data - 16, skb->nf_bridge->data, 16);
+		}
 	}
+	return 0;
 }
 
 /* This is called by the IP fragmenting code and it ensures there is
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6ccd32b..864fbbc 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -40,11 +40,15 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 	else {
 #ifdef CONFIG_BRIDGE_NETFILTER
 		/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
-		nf_bridge_maybe_copy_header(skb);
+		if (nf_bridge_maybe_copy_header(skb))
+			kfree_skb(skb);
+		else
 #endif
-		skb_push(skb, ETH_HLEN);
+		{
+			skb_push(skb, ETH_HLEN);
 
-		dev_queue_xmit(skb);
+			dev_queue_xmit(skb);
+		}
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 66a377c5041e1e399633153c8b500d457281e7c1 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 23:40:50 -0700
Subject: [DCCP]: Fix CCID3

This fixes CCID3 to give much closer performance to RFC4342.

CCID3 is meant to alter sending rate based on RTT and loss.

The performance was verified against:
http://wand.net.nz/~perry/max_download.php

For example I tested with netem and had the following parameters:
Delayed Acks 1, MSS 256 bytes, RTT 105 ms, packet loss 5%.

This gives a theoretical speed of 71.9 Kbits/s. I measured across three
runs with this patch set and got 70.1 Kbits/s. Without this patchset the
average was 232 Kbits/s which means Linux can't be used for CCID3 research
properly.

I also tested with netem turned off so box just acting as router with 1.2
msec RTT. The performance with this is the same with or without the patch
at around 30 Mbit/s.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0f85970..090bc39 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -342,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
 		new_packet->dccphtx_ccval =
 			DCCP_SKB_CB(skb)->dccpd_ccval =
 				hctx->ccid3hctx_last_win_count;
+		timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+				  hctx->ccid3hctx_t_ipi);
 	}
 out:
 	return rc;
@@ -413,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
 		if (len > 0) {
-			hctx->ccid3hctx_t_nom = now;
+			timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
+				  hctx->ccid3hctx_t_ipi);
 			ccid3_calc_new_t_ipi(hctx);
 			ccid3_calc_new_delta(hctx);
 			timeval_add_usecs(&hctx->ccid3hctx_t_nom,
@@ -757,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
 	}
 
 	hcrx->ccid3hcrx_tstamp_last_feedback = now;
-	hcrx->ccid3hcrx_last_counter	     = packet->dccphrx_ccval;
-	hcrx->ccid3hcrx_seqno_last_counter   = packet->dccphrx_seqno;
+	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
 	hcrx->ccid3hcrx_bytes_recv	     = 0;
 
 	/* Convert to multiples of 10us */
@@ -782,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
 		return 0;
 
-	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
 
 	if (dccp_packet_without_ack(skb))
 		return 0;
@@ -854,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
 		interval = 1;
 	}
 found:
+	if (!tail) {
+		LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n",
+		   __FUNCTION__);
+		return ~0;
+	}
 	rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
 	ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
 		       dccp_role(sk), sk, rtt);
@@ -864,9 +871,20 @@ found:
 	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
 	x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
 
+	if (x_recv == 0)
+		x_recv = hcrx->ccid3hcrx_x_recv;
+
 	tmp1 = (u64)x_recv * (u64)rtt;
 	do_div(tmp1,10000000);
 	tmp2 = (u32)tmp1;
+
+	if (!tmp2) {
+		LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 "
+		   "%s: x_recv = %u, rtt =%u\n",
+		   __FUNCTION__, x_recv, rtt);
+		return ~0;
+	}
+
 	fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
 	/* do not alter order above or you will get overflow on 32 bit */
 	p = tfrc_calc_x_reverse_lookup(fval);
@@ -882,31 +900,101 @@ found:
 static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	struct dccp_li_hist_entry *next, *head;
+	u64 seq_temp;
 
-	if (seq_loss != DCCP_MAX_SEQNO + 1 &&
-	    list_empty(&hcrx->ccid3hcrx_li_hist)) {
-		struct dccp_li_hist_entry *li_tail;
+	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
+		if (!dccp_li_hist_interval_new(ccid3_li_hist,
+		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
+			return;
 
-		li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
-						    &hcrx->ccid3hcrx_li_hist,
-						    seq_loss, win_loss);
-		if (li_tail == NULL)
+		next = (struct dccp_li_hist_entry *)
+		   hcrx->ccid3hcrx_li_hist.next;
+		next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+	} else {
+		struct dccp_li_hist_entry *entry;
+		struct list_head *tail;
+
+		head = (struct dccp_li_hist_entry *)
+		   hcrx->ccid3hcrx_li_hist.next;
+		/* FIXME win count check removed as was wrong */
+		/* should make this check with receive history */
+		/* and compare there as per section 10.2 of RFC4342 */
+
+		/* new loss event detected */
+		/* calculate last interval length */
+		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
+		entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC);
+
+		if (entry == NULL) {
+			printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__);
+			dump_stack();
 			return;
-		li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
-	} else
-		    LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of "
-				   "interval\n", __FUNCTION__);
+		}
+
+		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);
+
+		tail = hcrx->ccid3hcrx_li_hist.prev;
+		list_del(tail);
+		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
+
+		/* Create the newest interval */
+		entry->dccplih_seqno = seq_loss;
+		entry->dccplih_interval = seq_temp;
+		entry->dccplih_win_count = win_loss;
+	}
 }
 
-static void ccid3_hc_rx_detect_loss(struct sock *sk)
+static int ccid3_hc_rx_detect_loss(struct sock *sk,
+                                    struct dccp_rx_hist_entry *packet)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	u8 win_loss;
-	const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
-						      &hcrx->ccid3hcrx_li_hist,
-						      &win_loss);
+	struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
+	u64 seqno = packet->dccphrx_seqno;
+	u64 tmp_seqno;
+	int loss = 0;
+	u8 ccval;
+
+
+	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+
+	if (!rx_hist ||
+	   follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+		hcrx->ccid3hcrx_seqno_nonloss = seqno;
+		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+		goto detect_out;
+	}
+
 
-	ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
+	   > TFRC_RECV_NUM_LATE_LOSS) {
+		loss = 1;
+		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
+		   hcrx->ccid3hcrx_ccval_nonloss);
+		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+		dccp_inc_seqno(&tmp_seqno);
+		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+		dccp_inc_seqno(&tmp_seqno);
+		while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
+		   tmp_seqno, &ccval)) {
+		   	hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+			hcrx->ccid3hcrx_ccval_nonloss = ccval;
+			dccp_inc_seqno(&tmp_seqno);
+		}
+	}
+
+	/* FIXME - this code could be simplified with above while */
+	/* but works at moment */
+	if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+		hcrx->ccid3hcrx_seqno_nonloss = seqno;
+		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+	}
+
+detect_out:
+	dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
+		   &hcrx->ccid3hcrx_li_hist, packet,
+		   hcrx->ccid3hcrx_seqno_nonloss);
+	return loss;
 }
 
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -916,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_rx_hist_entry *packet;
 	struct timeval now;
 	u8 win_count;
-	u32 p_prev, r_sample, t_elapsed;
-	int ins;
+	u32 p_prev, rtt_prev, r_sample, t_elapsed;
+	int loss;
 
 	BUG_ON(hcrx == NULL ||
 	       !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
@@ -932,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	case DCCP_PKT_DATAACK:
 		if (opt_recv->dccpor_timestamp_echo == 0)
 			break;
-		p_prev = hcrx->ccid3hcrx_rtt;
+		rtt_prev = hcrx->ccid3hcrx_rtt;
 		dccp_timestamp(sk, &now);
 		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
 		r_sample = timeval_usecs(&now);
@@ -951,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
 					      r_sample / 10;
 
-		if (p_prev != hcrx->ccid3hcrx_rtt)
-			ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
+		if (rtt_prev != hcrx->ccid3hcrx_rtt)
+			ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n",
 				       dccp_role(sk), hcrx->ccid3hcrx_rtt,
 				       opt_recv->dccpor_elapsed_time);
 		break;
@@ -973,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 	win_count = packet->dccphrx_ccval;
 
-	ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
-				      &hcrx->ccid3hcrx_li_hist, packet);
+	loss = ccid3_hc_rx_detect_loss(sk, packet);
 
 	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
 		return;
@@ -991,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	case TFRC_RSTATE_DATA:
 		hcrx->ccid3hcrx_bytes_recv += skb->len -
 					      dccp_hdr(skb)->dccph_doff * 4;
-		if (ins != 0)
+		if (loss)
 			break;
 
 		dccp_timestamp(sk, &now);
@@ -1012,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
 		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
 
-	ccid3_hc_rx_detect_loss(sk);
 	p_prev = hcrx->ccid3hcrx_p;
 	
 	/* Calculate loss event rate */
@@ -1022,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		/* Scaling up by 1000000 as fixed decimal */
 		if (i_mean != 0)
 			hcrx->ccid3hcrx_p = 1000000 / i_mean;
+	} else {
+		printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__);
+		dump_stack();
 	}
 
 	if (hcrx->ccid3hcrx_p > p_prev) {
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 22cb9f8..0a2cb75 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -120,9 +120,10 @@ struct ccid3_hc_rx_sock {
 #define ccid3hcrx_x_recv	ccid3hcrx_tfrc.tfrcrx_x_recv
 #define ccid3hcrx_rtt		ccid3hcrx_tfrc.tfrcrx_rtt
 #define ccid3hcrx_p		ccid3hcrx_tfrc.tfrcrx_p
-  	u64			ccid3hcrx_seqno_last_counter:48,
+  	u64			ccid3hcrx_seqno_nonloss:48,
+				ccid3hcrx_ccval_nonloss:4,
 				ccid3hcrx_state:8,
-				ccid3hcrx_last_counter:4;
+				ccid3hcrx_ccval_last_counter:4;
   	u32			ccid3hcrx_bytes_recv;
   	struct timeval		ccid3hcrx_tstamp_last_feedback;
   	struct timeval		ccid3hcrx_tstamp_last_ack;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index b93d9fc..906c81a 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/module.h>
+#include <net/sock.h>
 
 #include "loss_interval.h"
 
@@ -90,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 	u32 w_tot  = 0;
 
 	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
-		if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
+		if (li_entry->dccplih_interval != ~0) {
 			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
 			w_tot  += dccp_li_hist_w[i];
+			if (i != 0)
+				i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
 		}
 
-		if (i != 0)
-			i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
 
 		if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
 			break;
@@ -107,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 
 	i_tot = max(i_tot0, i_tot1);
 
-	/* FIXME: Why do we do this? -Ian McDonald */
-	if (i_tot * 4 < w_tot)
-		i_tot = w_tot * 4;
+	if (!w_tot) {
+		LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__);
+		return 1;
+	}
 
-	return i_tot * 4 / w_tot;
+	return i_tot / w_tot;
 }
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
 
-struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
-						     struct list_head *list,
-						     const u64 seq_loss,
-						     const u8 win_loss)
+int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+   struct list_head *list, const u64 seq_loss, const u8 win_loss)
 {
-	struct dccp_li_hist_entry *tail = NULL, *entry;
+	struct dccp_li_hist_entry *entry;
 	int i;
 
-	for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
+	for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
 		entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
 		if (entry == NULL) {
 			dccp_li_hist_purge(hist, list);
-			return NULL;
+			dump_stack();
+			return 0;
 		}
-		if (tail == NULL)
-			tail = entry;
+		entry->dccplih_interval = ~0;
 		list_add(&entry->dccplih_node, list);
 	}
 
 	entry->dccplih_seqno     = seq_loss;
 	entry->dccplih_win_count = win_loss;
-	return tail;
+	return 1;
 }
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index dcb370a..0ae85f0 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
 
 extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
 
-extern struct dccp_li_hist_entry *
-			dccp_li_hist_interval_new(struct dccp_li_hist *hist,
-						  struct list_head *list,
-						  const u64 seq_loss,
-						  const u8 win_loss);
+extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+   struct list_head *list, const u64 seq_loss, const u8 win_loss);
 #endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 420c60f..b876c9c 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -112,64 +112,27 @@ struct dccp_rx_hist_entry *
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
 
-int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
 			    struct list_head *rx_list,
 			    struct list_head *li_list,
-			    struct dccp_rx_hist_entry *packet)
+			    struct dccp_rx_hist_entry *packet,
+			    u64 nonloss_seqno)
 {
-	struct dccp_rx_hist_entry *entry, *next, *iter;
+	struct dccp_rx_hist_entry *entry, *next;
 	u8 num_later = 0;
 
-	iter = dccp_rx_hist_head(rx_list);
-	if (iter == NULL)
-		dccp_rx_hist_add_entry(rx_list, packet);
-	else {
-		const u64 seqno = packet->dccphrx_seqno;
-
-		if (after48(seqno, iter->dccphrx_seqno))
-			dccp_rx_hist_add_entry(rx_list, packet);
-		else {
-			if (dccp_rx_hist_entry_data_packet(iter))
-				num_later = 1;
-
-			list_for_each_entry_continue(iter, rx_list,
-						     dccphrx_node) {
-				if (after48(seqno, iter->dccphrx_seqno)) {
-					dccp_rx_hist_add_entry(&iter->dccphrx_node,
-							       packet);
-					goto trim_history;
-				}
-
-				if (dccp_rx_hist_entry_data_packet(iter))
-					num_later++;
-
-				if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
-					dccp_rx_hist_entry_delete(hist, packet);
-					return 1;
-				}
-			}
-
-			if (num_later < TFRC_RECV_NUM_LATE_LOSS)
-				dccp_rx_hist_add_entry(rx_list, packet);
-			/*
-			 * FIXME: else what? should we destroy the packet
-			 * like above?
-			 */
-		}
-	}
+	list_add(&packet->dccphrx_node, rx_list);
 
-trim_history:
-	/*
-	 * Trim history (remove all packets after the NUM_LATE_LOSS + 1
-	 * data packets)
-	 */
 	num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
 
 	if (!list_empty(li_list)) {
 		list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
 			if (num_later == 0) {
-				list_del_init(&entry->dccphrx_node);
-				dccp_rx_hist_entry_delete(hist, entry);
+				if (after48(nonloss_seqno,
+				   entry->dccphrx_seqno)) {
+					list_del_init(&entry->dccphrx_node);
+					dccp_rx_hist_entry_delete(hist, entry);
+				}
 			} else if (dccp_rx_hist_entry_data_packet(entry))
 				--num_later;
 		}
@@ -217,94 +180,10 @@ trim_history:
 				--num_later;
 		}
 	}
-
-	return 0;
 }
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
 
-u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
-			     struct list_head *li_list, u8 *win_loss)
-{
-	struct dccp_rx_hist_entry *entry, *next, *packet;
-	struct dccp_rx_hist_entry *a_loss = NULL;
-	struct dccp_rx_hist_entry *b_loss = NULL;
-	u64 seq_loss = DCCP_MAX_SEQNO + 1;
-	u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
-
-	list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
-		if (num_later == 0) {
-			b_loss = entry;
-			break;
-		} else if (dccp_rx_hist_entry_data_packet(entry))
-			--num_later;
-	}
-
-	if (b_loss == NULL)
-		goto out;
-
-	num_later = 1;
-	list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
-		if (num_later == 0) {
-			a_loss = entry;
-			break;
-		} else if (dccp_rx_hist_entry_data_packet(entry))
-			--num_later;
-	}
-
-	if (a_loss == NULL) {
-		if (list_empty(li_list)) {
-			/* no loss event have occured yet */
-			LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
-				       "comparing to initial seqno\n",
-				       __FUNCTION__);
-			goto out;
-		} else {
-			LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
-				       __FUNCTION__);
-			goto out;
-		}
-	}
-
-	/* Locate a lost data packet */
-	entry = packet = b_loss;
-	list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
-		u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
-					     packet->dccphrx_seqno);
-
-		if (delta != 0) {
-			if (dccp_rx_hist_entry_data_packet(packet))
-				--delta;
-			/*
-			 * FIXME: check this, probably this % usage is because
-			 * in earlier drafts the ndp count was just 8 bits
-			 * long, but now it cam be up to 24 bits long.
-			 */
-#if 0
-			if (delta % DCCP_NDP_LIMIT !=
-			    (packet->dccphrx_ndp -
-			     entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
-#endif
-			if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
-				seq_loss = entry->dccphrx_seqno;
-				dccp_inc_seqno(&seq_loss);
-			}
-		}
-		packet = entry;
-		if (packet == a_loss)
-			break;
-	}
-out:
-	if (seq_loss != DCCP_MAX_SEQNO + 1)
-		*win_loss = a_loss->dccphrx_ccval;
-	else
-		*win_loss = 0; /* Paranoia */
-
-	return seq_loss;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
-
 struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
 {
 	struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index aea9c5d..067cf1c 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -166,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
 extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
 			       struct list_head *list);
 
-static inline void dccp_rx_hist_add_entry(struct list_head *list,
-					  struct dccp_rx_hist_entry *entry)
-{
-	list_add(&entry->dccphrx_node, list);
-}
-
 static inline struct dccp_rx_hist_entry *
 		dccp_rx_hist_head(struct list_head *list)
 {
@@ -190,10 +184,11 @@ static inline int
 	       entry->dccphrx_type == DCCP_PKT_DATAACK;
 }
 
-extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
 				   struct list_head *rx_list,
 				   struct list_head *li_list,
-				   struct dccp_rx_hist_entry *packet);
+				   struct dccp_rx_hist_entry *packet,
+				   u64 nonloss_seqno);
 
 extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
 				    struct list_head *li_list, u8 *win_loss);
-- 
cgit v0.10.2


From 9a654518e1b774b8e8f74a819fd12a931e7672c9 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 27 Aug 2006 01:23:22 -0700
Subject: [PATCH] drivers/rtc: fix rtc-s3c.c

In the cleanups of drivers/rtc/s3c-rtc.c, the base address for the
registers got broken.  This patch fixes that by ensuring the readb/writeb
are all prefixed with the base returned from ioremap()ing the registers.

Also fix check for valid year range, which was the wrong way around.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index d6d1bff..aacbfea 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -69,12 +69,12 @@ static void s3c_rtc_setaie(int to)
 
 	pr_debug("%s: aie=%d\n", __FUNCTION__, to);
 
-	tmp = readb(S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
+	tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
 
 	if (to)
 		tmp |= S3C2410_RTCALM_ALMEN;
 
-	writeb(tmp, S3C2410_RTCALM);
+	writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
 }
 
 static void s3c_rtc_setpie(int to)
@@ -84,12 +84,12 @@ static void s3c_rtc_setpie(int to)
 	pr_debug("%s: pie=%d\n", __FUNCTION__, to);
 
 	spin_lock_irq(&s3c_rtc_pie_lock);
-	tmp = readb(S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE;
+	tmp = readb(s3c_rtc_base + S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE;
 
 	if (to)
 		tmp |= S3C2410_TICNT_ENABLE;
 
-	writeb(tmp, S3C2410_TICNT);
+	writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
 	spin_unlock_irq(&s3c_rtc_pie_lock);
 }
 
@@ -98,13 +98,13 @@ static void s3c_rtc_setfreq(int freq)
 	unsigned int tmp;
 
 	spin_lock_irq(&s3c_rtc_pie_lock);
-	tmp = readb(S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
+	tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE;
 
 	s3c_rtc_freq = freq;
 
 	tmp |= (128 / freq)-1;
 
-	writeb(tmp, S3C2410_TICNT);
+	writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
 	spin_unlock_irq(&s3c_rtc_pie_lock);
 }
 
@@ -113,14 +113,15 @@ static void s3c_rtc_setfreq(int freq)
 static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 {
 	unsigned int have_retried = 0;
+	void __iomem *base = s3c_rtc_base;
 
  retry_get_time:
-	rtc_tm->tm_min  = readb(S3C2410_RTCMIN);
-	rtc_tm->tm_hour = readb(S3C2410_RTCHOUR);
-	rtc_tm->tm_mday = readb(S3C2410_RTCDATE);
-	rtc_tm->tm_mon  = readb(S3C2410_RTCMON);
-	rtc_tm->tm_year = readb(S3C2410_RTCYEAR);
-	rtc_tm->tm_sec  = readb(S3C2410_RTCSEC);
+	rtc_tm->tm_min  = readb(base + S3C2410_RTCMIN);
+	rtc_tm->tm_hour = readb(base + S3C2410_RTCHOUR);
+	rtc_tm->tm_mday = readb(base + S3C2410_RTCDATE);
+	rtc_tm->tm_mon  = readb(base + S3C2410_RTCMON);
+	rtc_tm->tm_year = readb(base + S3C2410_RTCYEAR);
+	rtc_tm->tm_sec  = readb(base + S3C2410_RTCSEC);
 
 	/* the only way to work out wether the system was mid-update
 	 * when we read it is to check the second counter, and if it
@@ -151,17 +152,25 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 
 static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 {
+	void __iomem *base = s3c_rtc_base;
+
 	/* the rtc gets round the y2k problem by just not supporting it */
 
-	if (tm->tm_year < 100)
+	if (tm->tm_year > 100) {
+		dev_err(dev, "rtc only supports 100 years\n");
 		return -EINVAL;
+	}
+
+	pr_debug("set time %02d.%02d.%02d %02d/%02d/%02d\n",
+		 tm->tm_year, tm->tm_mon, tm->tm_mday,
+		 tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-	writeb(BIN2BCD(tm->tm_sec),  S3C2410_RTCSEC);
-	writeb(BIN2BCD(tm->tm_min),  S3C2410_RTCMIN);
-	writeb(BIN2BCD(tm->tm_hour), S3C2410_RTCHOUR);
-	writeb(BIN2BCD(tm->tm_mday), S3C2410_RTCDATE);
-	writeb(BIN2BCD(tm->tm_mon + 1), S3C2410_RTCMON);
-	writeb(BIN2BCD(tm->tm_year - 100), S3C2410_RTCYEAR);
+	writeb(BIN2BCD(tm->tm_sec),  base + S3C2410_RTCSEC);
+	writeb(BIN2BCD(tm->tm_min),  base + S3C2410_RTCMIN);
+	writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR);
+	writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE);
+	writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON);
+	writeb(BIN2BCD(tm->tm_year - 100), base + S3C2410_RTCYEAR);
 
 	return 0;
 }
@@ -169,16 +178,17 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct rtc_time *alm_tm = &alrm->time;
+	void __iomem *base = s3c_rtc_base;
 	unsigned int alm_en;
 
-	alm_tm->tm_sec  = readb(S3C2410_ALMSEC);
-	alm_tm->tm_min  = readb(S3C2410_ALMMIN);
-	alm_tm->tm_hour = readb(S3C2410_ALMHOUR);
-	alm_tm->tm_mon  = readb(S3C2410_ALMMON);
-	alm_tm->tm_mday = readb(S3C2410_ALMDATE);
-	alm_tm->tm_year = readb(S3C2410_ALMYEAR);
+	alm_tm->tm_sec  = readb(base + S3C2410_ALMSEC);
+	alm_tm->tm_min  = readb(base + S3C2410_ALMMIN);
+	alm_tm->tm_hour = readb(base + S3C2410_ALMHOUR);
+	alm_tm->tm_mon  = readb(base + S3C2410_ALMMON);
+	alm_tm->tm_mday = readb(base + S3C2410_ALMDATE);
+	alm_tm->tm_year = readb(base + S3C2410_ALMYEAR);
 
-	alm_en = readb(S3C2410_RTCALM);
+	alm_en = readb(base + S3C2410_RTCALM);
 
 	pr_debug("read alarm %02x %02x.%02x.%02x %02x/%02x/%02x\n",
 		 alm_en,
@@ -226,6 +236,7 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct rtc_time *tm = &alrm->time;
+	void __iomem *base = s3c_rtc_base;
 	unsigned int alrm_en;
 
 	pr_debug("s3c_rtc_setalarm: %d, %02x/%02x/%02x %02x.%02x.%02x\n",
@@ -234,32 +245,32 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 		 tm->tm_hour & 0xff, tm->tm_min & 0xff, tm->tm_sec);
 
 
-	alrm_en = readb(S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
-	writeb(0x00, S3C2410_RTCALM);
+	alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
+	writeb(0x00, base + S3C2410_RTCALM);
 
 	if (tm->tm_sec < 60 && tm->tm_sec >= 0) {
 		alrm_en |= S3C2410_RTCALM_SECEN;
-		writeb(BIN2BCD(tm->tm_sec), S3C2410_ALMSEC);
+		writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC);
 	}
 
 	if (tm->tm_min < 60 && tm->tm_min >= 0) {
 		alrm_en |= S3C2410_RTCALM_MINEN;
-		writeb(BIN2BCD(tm->tm_min), S3C2410_ALMMIN);
+		writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN);
 	}
 
 	if (tm->tm_hour < 24 && tm->tm_hour >= 0) {
 		alrm_en |= S3C2410_RTCALM_HOUREN;
-		writeb(BIN2BCD(tm->tm_hour), S3C2410_ALMHOUR);
+		writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR);
 	}
 
 	pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en);
 
-	writeb(alrm_en, S3C2410_RTCALM);
+	writeb(alrm_en, base + S3C2410_RTCALM);
 
 	if (0) {
-		alrm_en = readb(S3C2410_RTCALM);
+		alrm_en = readb(base + S3C2410_RTCALM);
 		alrm_en &= ~S3C2410_RTCALM_ALMEN;
-		writeb(alrm_en, S3C2410_RTCALM);
+		writeb(alrm_en, base + S3C2410_RTCALM);
 		disable_irq_wake(s3c_rtc_alarmno);
 	}
 
@@ -319,8 +330,8 @@ static int s3c_rtc_ioctl(struct device *dev,
 
 static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
 {
-	unsigned int rtcalm = readb(S3C2410_RTCALM);
-	unsigned int ticnt = readb (S3C2410_TICNT);
+	unsigned int rtcalm = readb(s3c_rtc_base + S3C2410_RTCALM);
+	unsigned int ticnt = readb(s3c_rtc_base + S3C2410_TICNT);
 
 	seq_printf(seq, "alarm_IRQ\t: %s\n",
 		   (rtcalm & S3C2410_RTCALM_ALMEN) ? "yes" : "no" );
@@ -387,39 +398,40 @@ static struct rtc_class_ops s3c_rtcops = {
 
 static void s3c_rtc_enable(struct platform_device *pdev, int en)
 {
+	void __iomem *base = s3c_rtc_base;
 	unsigned int tmp;
 
 	if (s3c_rtc_base == NULL)
 		return;
 
 	if (!en) {
-		tmp = readb(S3C2410_RTCCON);
-		writeb(tmp & ~S3C2410_RTCCON_RTCEN, S3C2410_RTCCON);
+		tmp = readb(base + S3C2410_RTCCON);
+		writeb(tmp & ~S3C2410_RTCCON_RTCEN, base + S3C2410_RTCCON);
 
-		tmp = readb(S3C2410_TICNT);
-		writeb(tmp & ~S3C2410_TICNT_ENABLE, S3C2410_TICNT);
+		tmp = readb(base + S3C2410_TICNT);
+		writeb(tmp & ~S3C2410_TICNT_ENABLE, base + S3C2410_TICNT);
 	} else {
 		/* re-enable the device, and check it is ok */
 
-		if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){
+		if ((readb(base+S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){
 			dev_info(&pdev->dev, "rtc disabled, re-enabling\n");
 
-			tmp = readb(S3C2410_RTCCON);
-			writeb(tmp | S3C2410_RTCCON_RTCEN , S3C2410_RTCCON);
+			tmp = readb(base + S3C2410_RTCCON);
+			writeb(tmp|S3C2410_RTCCON_RTCEN, base+S3C2410_RTCCON);
 		}
 
-		if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){
+		if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){
 			dev_info(&pdev->dev, "removing RTCCON_CNTSEL\n");
 
-			tmp = readb(S3C2410_RTCCON);
-			writeb(tmp& ~S3C2410_RTCCON_CNTSEL , S3C2410_RTCCON);
+			tmp = readb(base + S3C2410_RTCCON);
+			writeb(tmp& ~S3C2410_RTCCON_CNTSEL, base+S3C2410_RTCCON);
 		}
 
-		if ((readb(S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){
+		if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){
 			dev_info(&pdev->dev, "removing RTCCON_CLKRST\n");
 
-			tmp = readb(S3C2410_RTCCON);
-			writeb(tmp & ~S3C2410_RTCCON_CLKRST, S3C2410_RTCCON);
+			tmp = readb(base + S3C2410_RTCCON);
+			writeb(tmp & ~S3C2410_RTCCON_CLKRST, base+S3C2410_RTCCON);
 		}
 	}
 }
@@ -475,8 +487,8 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 	}
 
 	s3c_rtc_mem = request_mem_region(res->start,
-					     res->end-res->start+1,
-					     pdev->name);
+					 res->end-res->start+1,
+					 pdev->name);
 
 	if (s3c_rtc_mem == NULL) {
 		dev_err(&pdev->dev, "failed to reserve memory region\n");
@@ -495,7 +507,8 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
 	s3c_rtc_enable(pdev, 1);
 
- 	pr_debug("s3c2410_rtc: RTCCON=%02x\n", readb(S3C2410_RTCCON));
+ 	pr_debug("s3c2410_rtc: RTCCON=%02x\n",
+		 readb(s3c_rtc_base + S3C2410_RTCCON));
 
 	s3c_rtc_setfreq(s3c_rtc_freq);
 
@@ -543,7 +556,7 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state)
 
 	/* save TICNT for anyone using periodic interrupts */
 
-	ticnt_save = readb(S3C2410_TICNT);
+	ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT);
 
 	/* calculate time delta for suspend */
 
@@ -567,7 +580,7 @@ static int s3c_rtc_resume(struct platform_device *pdev)
 	rtc_tm_to_time(&tm, &time.tv_sec);
 	restore_time_delta(&s3c_rtc_delta, &time);
 
-	writeb(ticnt_save, S3C2410_TICNT);
+	writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT);
 	return 0;
 }
 #else
diff --git a/include/asm-arm/arch-s3c2410/regs-rtc.h b/include/asm-arm/arch-s3c2410/regs-rtc.h
index 228983f..0fbec07 100644
--- a/include/asm-arm/arch-s3c2410/regs-rtc.h
+++ b/include/asm-arm/arch-s3c2410/regs-rtc.h
@@ -18,7 +18,7 @@
 #ifndef __ASM_ARCH_REGS_RTC_H
 #define __ASM_ARCH_REGS_RTC_H __FILE__
 
-#define S3C2410_RTCREG(x) ((x) + S3C24XX_VA_RTC)
+#define S3C2410_RTCREG(x) (x)
 
 #define S3C2410_RTCCON	      S3C2410_RTCREG(0x40)
 #define S3C2410_RTCCON_RTCEN  (1<<0)
-- 
cgit v0.10.2


From c06aad854fdb9da38fcc22dccfe9d72919453e43 Mon Sep 17 00:00:00 2001
From: Daniel Kobras <kobras@linux.de>
Date: Sun, 27 Aug 2006 01:23:24 -0700
Subject: [PATCH] dm: Fix deadlock under high i/o load in raid1 setup.

On an nForce4-equipped machine with two SATA disk in raid1 setup using dmraid,
we experienced frequent deadlock of the system under high i/o load.  'cat
/dev/zero > ~/zero' was the most reliable way to reproduce them: Randomly
after a few GB, 'cp' would be left in 'D' state along with kjournald and
kmirrord.  The functions cp and kjournald were blocked in did vary, but
kmirrord's wchan always pointed to 'mempool_alloc()'.  We've seen this pattern
on 2.6.15 and 2.6.17 kernels.  http://lkml.org/lkml/2005/4/20/142 indicates
that this problem has been around even before.

So much for the facts, here's my interpretation: mempool_alloc() first tries
to atomically allocate the requested memory, or falls back to hand out
preallocated chunks from the mempool.  If both fail, it puts the calling
process (kmirrord in this case) on a private waitqueue until somebody refills
the pool.  Where the only 'somebody' is kmirrord itself, so we have a
deadlock.

I worked around this problem by falling back to a (blocking) kmalloc when
before kmirrord would have ended up on the waitqueue.  This defeats part of
the benefits of using the mempool, but at least keeps the system running.  And
it could be done with a two-line change.  Note that mempool_alloc() clears the
GFP_NOIO flag internally, and only uses it to decide whether to wait or return
an error if immediate allocation fails, so the attached patch doesn't change
behaviour in the non-deadlocking case.  Path is against current git
(2.6.18-rc4), but should apply to earlier versions as well.  I've tested on
2.6.15, where this patch makes the difference between random lockup and a
stable system.

Signed-off-by: Daniel Kobras <kobras@linux.de>
Acked-by: Alasdair G Kergon <agk@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index be48ced..c54de98 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -255,7 +255,9 @@ static struct region *__rh_alloc(struct region_hash *rh, region_t region)
 	struct region *reg, *nreg;
 
 	read_unlock(&rh->hash_lock);
-	nreg = mempool_alloc(rh->region_pool, GFP_NOIO);
+	nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
+	if (unlikely(!nreg))
+		nreg = kmalloc(sizeof(struct region), GFP_NOIO);
 	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
 		RH_CLEAN : RH_NOSYNC;
 	nreg->rh = rh;
-- 
cgit v0.10.2


From b6b5bce3571e496504a89ee575d32101e0a98b93 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 27 Aug 2006 01:23:25 -0700
Subject: [PATCH] swsusp: Fix swap_type_of

There is a bug in mm/swapfile.c#swap_type_of() that makes swsusp only be
able to use the first active swap partition as the resume device.  Fix it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Hugh Dickins <hugh@veritas.com>
Acked-by: Pavel Machek <pavel@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index e70d6c6..f1f5ec7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -442,11 +442,12 @@ int swap_type_of(dev_t device)
 
 		if (!(swap_info[i].flags & SWP_WRITEOK))
 			continue;
+
 		if (!device) {
 			spin_unlock(&swap_lock);
 			return i;
 		}
-		inode = swap_info->swap_file->f_dentry->d_inode;
+		inode = swap_info[i].swap_file->f_dentry->d_inode;
 		if (S_ISBLK(inode->i_mode) &&
 		    device == MKDEV(imajor(inode), iminor(inode))) {
 			spin_unlock(&swap_lock);
-- 
cgit v0.10.2


From 641741e010b615bae417c876a21d17dbd616241f Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Sun, 27 Aug 2006 01:23:27 -0700
Subject: [PATCH] rtc-s3c.c: fix time setting checks

Fix the year check on setting the time with the S3C24XX RTC driver.  Also
move the debug to before the set to see what is going on if it does fail.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index aacbfea..2c7de79 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -153,24 +153,25 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 {
 	void __iomem *base = s3c_rtc_base;
+	int year = tm->tm_year - 100;
 
-	/* the rtc gets round the y2k problem by just not supporting it */
+	pr_debug("set time %02d.%02d.%02d %02d/%02d/%02d\n",
+		 tm->tm_year, tm->tm_mon, tm->tm_mday,
+		 tm->tm_hour, tm->tm_min, tm->tm_sec);
+
+	/* we get around y2k by simply not supporting it */
 
-	if (tm->tm_year > 100) {
+	if (year < 0 || year >= 100) {
 		dev_err(dev, "rtc only supports 100 years\n");
 		return -EINVAL;
 	}
 
-	pr_debug("set time %02d.%02d.%02d %02d/%02d/%02d\n",
-		 tm->tm_year, tm->tm_mon, tm->tm_mday,
-		 tm->tm_hour, tm->tm_min, tm->tm_sec);
-
 	writeb(BIN2BCD(tm->tm_sec),  base + S3C2410_RTCSEC);
 	writeb(BIN2BCD(tm->tm_min),  base + S3C2410_RTCMIN);
 	writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR);
 	writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE);
 	writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON);
-	writeb(BIN2BCD(tm->tm_year - 100), base + S3C2410_RTCYEAR);
+	writeb(BIN2BCD(year), base + S3C2410_RTCYEAR);
 
 	return 0;
 }
-- 
cgit v0.10.2


From cc36e7f124da139a819ad316c39d9dbcb5ba1897 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Sun, 27 Aug 2006 01:23:27 -0700
Subject: [PATCH] tty: remove bogus call to cdev_del()

When cdev_add() failed there is no reason to call cdev_del().

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index bfdb902..d6e4eaa 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3094,7 +3094,6 @@ int tty_register_driver(struct tty_driver *driver)
 	driver->cdev.owner = driver->owner;
 	error = cdev_add(&driver->cdev, dev, driver->num);
 	if (error) {
-		cdev_del(&driver->cdev);
 		unregister_chrdev_region(dev, driver->num);
 		driver->ttys = NULL;
 		driver->termios = driver->termios_locked = NULL;
-- 
cgit v0.10.2


From a2e0b56316fa90e137802fdad6a7c6a9b85c86c3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 27 Aug 2006 01:23:28 -0700
Subject: [PATCH] Fix docs for fs.suid_dumpable

Sergey Vlasov noticed that there is not kernel.suid_dumpable, but
fs.suid_dumpable.

How KERN_SETUID_DUMPABLE ended up in fs_table[]? Hell knows...

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 0b62c62..5c3a519 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -25,6 +25,7 @@ Currently, these files are in /proc/sys/fs:
 - inode-state
 - overflowuid
 - overflowgid
+- suid_dumpable
 - super-max
 - super-nr
 
@@ -131,6 +132,25 @@ The default is 65534.
 
 ==============================================================
 
+suid_dumpable:
+
+This value can be used to query and set the core dump mode for setuid
+or otherwise protected/tainted binaries. The modes are
+
+0 - (default) - traditional behaviour. Any process which has changed
+	privilege levels or is execute only will not be dumped
+1 - (debug) - all processes dump core when possible. The core dump is
+	owned by the current user and no security is applied. This is
+	intended for system debugging situations only. Ptrace is unchecked.
+2 - (suidsafe) - any binary which normally would not be dumped is dumped
+	readable by root only. This allows the end user to remove
+	such a dump but not access it directly. For security reasons
+	core dumps in this mode will not overwrite one another or
+	other files. This mode is appropriate when adminstrators are
+	attempting to debug problems in a normal environment.
+
+==============================================================
+
 super-max & super-nr:
 
 These numbers control the maximum number of superblocks, and
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 7345c33..89bf8c2 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -50,7 +50,6 @@ show up in /proc/sys/kernel:
 - shmmax                      [ sysv ipc ]
 - shmmni
 - stop-a                      [ SPARC only ]
-- suid_dumpable
 - sysrq                       ==> Documentation/sysrq.txt
 - tainted
 - threads-max
@@ -310,25 +309,6 @@ kernel.  This value defaults to SHMMAX.
 
 ==============================================================
 
-suid_dumpable:
-
-This value can be used to query and set the core dump mode for setuid
-or otherwise protected/tainted binaries. The modes are
-
-0 - (default) - traditional behaviour. Any process which has changed
-	privilege levels or is execute only will not be dumped
-1 - (debug) - all processes dump core when possible. The core dump is
-	owned by the current user and no security is applied. This is
-	intended for system debugging situations only. Ptrace is unchecked.
-2 - (suidsafe) - any binary which normally would not be dumped is dumped
-	readable by root only. This allows the end user to remove
-	such a dump but not access it directly. For security reasons
-	core dumps in this mode will not overwrite one another or
-	other files. This mode is appropriate when adminstrators are
-	attempting to debug problems in a normal environment.
-
-==============================================================
-
 tainted: 
 
 Non-zero if the kernel has been tainted.  Numeric values, which
-- 
cgit v0.10.2


From 7334bb4ae931159384acf168eacb0d5d6e0d083c Mon Sep 17 00:00:00 2001
From: Danny Tholen <obiwan@mailmij.org>
Date: Sun, 27 Aug 2006 01:23:29 -0700
Subject: [PATCH] 1394: fix for recently added firewire patch that breaks
 things on ppc

Recently a patch was added for preliminary suspend/resume handling on
!PPC_PMAC.  However, this broke both suspend and firewire on powerpc
because it saves the pci state after the device has already been disabled.

This moves the save state to before the pmac specific code.

Signed-off-by: Danny Tholen <obiwan@mailmij.org>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ben Collins <bcollins@ubuntu.com>
Cc: Jody McIntyre <scjody@modernduck.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c
index d4bad67..448df27 100644
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -3552,6 +3552,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 
 static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
+	pci_save_state(pdev);
+
 #ifdef CONFIG_PPC_PMAC
 	if (machine_is(powermac)) {
 		struct device_node *of_node;
@@ -3563,8 +3565,6 @@ static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 	}
 #endif
 
-	pci_save_state(pdev);
-
 	return 0;
 }
 
-- 
cgit v0.10.2


From 6946bd636364effce06ea46fe8f8cd6e2edb004e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sun, 27 Aug 2006 01:23:31 -0700
Subject: [PATCH] lockdep: fix blkdev_open() warning

On Wed, 2006-08-09 at 07:57 +0200, Rolf Eike Beer wrote:
> =============================================
> [ INFO: possible recursive locking detected ]
> ---------------------------------------------
> parted/7929 is trying to acquire lock:
>  (&bdev->bd_mutex){--..}, at: [<c105eb8d>] __blkdev_put+0x1e/0x13c
>
> but task is already holding lock:
>  (&bdev->bd_mutex){--..}, at: [<c105eec6>] do_open+0x72/0x3a8
>
> other info that might help us debug this:
> 1 lock held by parted/7929:
>  #0:  (&bdev->bd_mutex){--..}, at: [<c105eec6>] do_open+0x72/0x3a8
> stack backtrace:
>  [<c1003aad>] show_trace_log_lvl+0x58/0x15b
>  [<c100495f>] show_trace+0xd/0x10
>  [<c1004979>] dump_stack+0x17/0x1a
>  [<c102dee5>] __lock_acquire+0x753/0x99c
>  [<c102e3b0>] lock_acquire+0x4a/0x6a
>  [<c1204501>] mutex_lock_nested+0xc8/0x20c
>  [<c105eb8d>] __blkdev_put+0x1e/0x13c
>  [<c105ecc4>] blkdev_put+0xa/0xc
>  [<c105f18a>] do_open+0x336/0x3a8
>  [<c105f21b>] blkdev_open+0x1f/0x4c
>  [<c1057b40>] __dentry_open+0xc7/0x1aa
>  [<c1057c91>] nameidata_to_filp+0x1c/0x2e
>  [<c1057cd1>] do_filp_open+0x2e/0x35
>  [<c1057dd7>] do_sys_open+0x38/0x68
>  [<c1057e33>] sys_open+0x16/0x18
>  [<c1002845>] sysenter_past_esp+0x56/0x8d

OK, I'm having a look here; its all new to me so bear with me.

blkdev_open() calls
  do_open(bdev, ...,BD_MUTEX_NORMAL) and takes
    mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_NORMAL)

then something fails, and we're thrown to:

out_first: where
    if (bdev != bdev->bd_contains)
      blkdev_put(bdev->bd_contains) which is
        __blkdev_put(bdev->bd_contains, BD_MUTEX_NORMAL) which does
          mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_NORMAL) <--- lockdep trigger

When going to out_first, dbev->bd_contains is either bdev or whole, and
since we take the branch it must be whole. So it seems to me the
following patch would be the right one:

[akpm@osdl.org: compile fix]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3753457..045f988 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
 
+static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
+{
+	int ret = 0;
+	struct inode *bd_inode = bdev->bd_inode;
+	struct gendisk *disk = bdev->bd_disk;
+
+	mutex_lock_nested(&bdev->bd_mutex, subclass);
+	lock_kernel();
+	if (!--bdev->bd_openers) {
+		sync_blockdev(bdev);
+		kill_bdev(bdev);
+	}
+	if (bdev->bd_contains == bdev) {
+		if (disk->fops->release)
+			ret = disk->fops->release(bd_inode, NULL);
+	} else {
+		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+				  subclass + 1);
+		bdev->bd_contains->bd_part_count--;
+		mutex_unlock(&bdev->bd_contains->bd_mutex);
+	}
+	if (!bdev->bd_openers) {
+		struct module *owner = disk->fops->owner;
+
+		put_disk(disk);
+		module_put(owner);
+
+		if (bdev->bd_contains != bdev) {
+			kobject_put(&bdev->bd_part->kobj);
+			bdev->bd_part = NULL;
+		}
+		bdev->bd_disk = NULL;
+		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+		if (bdev != bdev->bd_contains)
+			__blkdev_put(bdev->bd_contains, subclass + 1);
+		bdev->bd_contains = NULL;
+	}
+	unlock_kernel();
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+	return ret;
+}
+
+int blkdev_put(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
+}
+EXPORT_SYMBOL(blkdev_put);
+
+int blkdev_put_partition(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
+}
+EXPORT_SYMBOL(blkdev_put_partition);
+
 static int
 blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
 
@@ -980,7 +1035,7 @@ out_first:
 	bdev->bd_disk = NULL;
 	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 	if (bdev != bdev->bd_contains)
-		blkdev_put(bdev->bd_contains);
+		__blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE);
 	bdev->bd_contains = NULL;
 	put_disk(disk);
 	module_put(owner);
@@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return res;
 }
 
-static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
-{
-	int ret = 0;
-	struct inode *bd_inode = bdev->bd_inode;
-	struct gendisk *disk = bdev->bd_disk;
-
-	mutex_lock_nested(&bdev->bd_mutex, subclass);
-	lock_kernel();
-	if (!--bdev->bd_openers) {
-		sync_blockdev(bdev);
-		kill_bdev(bdev);
-	}
-	if (bdev->bd_contains == bdev) {
-		if (disk->fops->release)
-			ret = disk->fops->release(bd_inode, NULL);
-	} else {
-		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
-				  subclass + 1);
-		bdev->bd_contains->bd_part_count--;
-		mutex_unlock(&bdev->bd_contains->bd_mutex);
-	}
-	if (!bdev->bd_openers) {
-		struct module *owner = disk->fops->owner;
-
-		put_disk(disk);
-		module_put(owner);
-
-		if (bdev->bd_contains != bdev) {
-			kobject_put(&bdev->bd_part->kobj);
-			bdev->bd_part = NULL;
-		}
-		bdev->bd_disk = NULL;
-		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains)
-			__blkdev_put(bdev->bd_contains, subclass + 1);
-		bdev->bd_contains = NULL;
-	}
-	unlock_kernel();
-	mutex_unlock(&bdev->bd_mutex);
-	bdput(bdev);
-	return ret;
-}
-
-int blkdev_put(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
-}
-
-EXPORT_SYMBOL(blkdev_put);
-
-int blkdev_put_partition(struct block_device *bdev)
-{
-	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
-}
-
-EXPORT_SYMBOL(blkdev_put_partition);
-
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
-- 
cgit v0.10.2


From 7fd5aecc5d32882f8e29b14370e9ce2520e7e56f Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sun, 27 Aug 2006 01:23:33 -0700
Subject: [PATCH] mtd corruption fix

Read the return value before we release the nand device otherwise the
value can become corrupted by another user of chip->ops, ultimately
resulting in filesystem corruption.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Acked-by: Josh Boyer <jwboyer@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 62b8613..c8cbc00 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1093,9 +1093,10 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len,
 
 	ret = nand_do_read_ops(mtd, from, &chip->ops);
 
+	*retlen = chip->ops.retlen;
+
 	nand_release_device(mtd);
 
-	*retlen = chip->ops.retlen;
 	return ret;
 }
 
@@ -1691,9 +1692,10 @@ static int nand_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 	ret = nand_do_write_ops(mtd, to, &chip->ops);
 
+	*retlen = chip->ops.retlen;
+
 	nand_release_device(mtd);
 
-	*retlen = chip->ops.retlen;
 	return ret;
 }
 
-- 
cgit v0.10.2


From cb3e0fe3a5dabdc5eda50b825acb23bdfa2d1d55 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Sun, 27 Aug 2006 01:23:33 -0700
Subject: [PATCH] x86: Fix dmi detection of MacBookPro and iMac

Commit b64ef8afa58f397e1eaba2bd9ecaa6812064d464 ("[PATCH] add imacfb
documentation and detection") contained a wrong DMI_MATCH.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/imacfb.c b/drivers/video/imacfb.c
index b485bec..18ea4a5 100644
--- a/drivers/video/imacfb.c
+++ b/drivers/video/imacfb.c
@@ -71,10 +71,10 @@ static int set_system(struct dmi_system_id *id)
 static struct dmi_system_id __initdata dmi_system_table[] = {
 	{ set_system, "iMac4,1", {
 	  DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."),
-	  DMI_MATCH(DMI_BIOS_VERSION,"iMac4,1") }, (void*)M_I17},
+	  DMI_MATCH(DMI_PRODUCT_NAME,"iMac4,1") }, (void*)M_I17},
 	{ set_system, "MacBookPro1,1", {
 	  DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."),
-	  DMI_MATCH(DMI_BIOS_VERSION,"MacBookPro1,1") }, (void*)M_I17},
+	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro1,1") }, (void*)M_I17},
 	{ set_system, "MacBook1,1", {
 	  DMI_MATCH(DMI_BIOS_VENDOR,"Apple Computer, Inc."),
 	  DMI_MATCH(DMI_PRODUCT_NAME,"MacBook1,1")}, (void *)M_MACBOOK},
-- 
cgit v0.10.2


From f8986c241dfd54d51c9eff967129a550ae230144 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 27 Aug 2006 01:23:34 -0700
Subject: [PATCH] revert "Drop tasklist lock in do_sched_setscheduler"

sched_setscheduler() looks at ->signal->rlim[].  It is unsafe do
dereference ->signal unless tasklist_lock or ->siglock is held (or p ==
current).  We pin the task structure, but this can't prevent from
release_task()->__exit_signal() which sets ->signal = NULL.

Restore tasklist_lock across the setscheduler call.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index a2be2d0..a234fbe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4162,10 +4162,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 		read_unlock_irq(&tasklist_lock);
 		return -ESRCH;
 	}
-	get_task_struct(p);
-	read_unlock_irq(&tasklist_lock);
 	retval = sched_setscheduler(p, policy, &lparam);
-	put_task_struct(p);
+	read_unlock_irq(&tasklist_lock);
 
 	return retval;
 }
-- 
cgit v0.10.2


From a0cc621f52a4dea10c34eeed6eb4e36b26db63dc Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Sun, 27 Aug 2006 01:23:35 -0700
Subject: [PATCH] cpufreq: acpi-cpufreq: Ignore failure from
 acpi_cpufreq_early_init_acpi

Ignore the return value of early_init_acpi(), as it can give false error
messages.  If there is something really wrong, then register_driver will
fail cleanly with EINVAL later.

[ background: modprobe acpi-cpufreq on systems not capable of speed-scaling
  started failing with 'invalid argument', where previously it would only
  ever -ENODEV

  I'm not 100% happy with the solution. It'd be better to handle
  failure properly, but this is a low-impact change for 2.6.18
  We can always revisit doing this better in .19   --davej.]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index efb41e8..e6ea00e 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -567,16 +567,11 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
 static int __init
 acpi_cpufreq_init (void)
 {
-	int                     result = 0;
-
 	dprintk("acpi_cpufreq_init\n");
 
-	result = acpi_cpufreq_early_init_acpi();
+	acpi_cpufreq_early_init_acpi();
 
-	if (!result)
- 		result = cpufreq_register_driver(&acpi_cpufreq_driver);
-	
-	return (result);
+ 	return cpufreq_register_driver(&acpi_cpufreq_driver);
 }
 
 
-- 
cgit v0.10.2


From 01cfaf0d12ae5fa092cc916ca4066ee1598e857d Mon Sep 17 00:00:00 2001
From: Dirk Eibach <eibach@gdsys.de>
Date: Sun, 27 Aug 2006 01:23:36 -0700
Subject: [PATCH] char/moxa.c: fix endianess and multiple-card issues

While testing Moxa C218T/PCI on PowerPC 405EP I found that loading firmware
using the linux kernel driver fails because calculation of the checksum is
not endianess independent in the original code.

After I fixed this I found that uploading firmware in a system with
multiple cards causes a kernel oops.  I had a look in the recent moxa
sources and found that they do some kind of locking there.  Applying this
lock fixed the problem.

Alan sayeth:

  Checksum changes are clearly correct.  Other changes is an improvement but
  not I think enough to handle malicious firmware attacks.  That said such an
  attacker has CAP_SYS_RAWIO anyway so that part is irrelevant except for
  neatness.

[akpm@osdl.org: cleanups]
Signed-off-by: Dirk Eibach <eibach@gdsys.de>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 4ea7bd5..a369dd6 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -142,6 +142,7 @@ typedef struct _moxa_board_conf {
 
 static moxa_board_conf moxa_boards[MAX_BOARDS];
 static void __iomem *moxaBaseAddr[MAX_BOARDS];
+static int loadstat[MAX_BOARDS];
 
 struct moxa_str {
 	int type;
@@ -1688,6 +1689,8 @@ int MoxaDriverPoll(void)
 	if (moxaCard == 0)
 		return (-1);
 	for (card = 0; card < MAX_BOARDS; card++) {
+	        if (loadstat[card] == 0)
+			continue;
 		if ((ports = moxa_boards[card].numPorts) == 0)
 			continue;
 		if (readb(moxaIntPend[card]) == 0xff) {
@@ -2903,6 +2906,7 @@ static int moxaloadcode(int cardno, unsigned char __user *tmp, int len)
 		}
 		break;
 	}
+	loadstat[cardno] = 1;
 	return (0);
 }
 
@@ -2920,7 +2924,7 @@ static int moxaloadc218(int cardno, void __iomem *baseAddr, int len)
 	len1 = len >> 1;
 	ptr = (ushort *) moxaBuff;
 	for (i = 0; i < len1; i++)
-		usum += *(ptr + i);
+		usum += le16_to_cpu(*(ptr + i));
 	retry = 0;
 	do {
 		len1 = len >> 1;
@@ -2992,7 +2996,7 @@ static int moxaloadc320(int cardno, void __iomem *baseAddr, int len, int *numPor
 	wlen = len >> 1;
 	uptr = (ushort *) moxaBuff;
 	for (i = 0; i < wlen; i++)
-		usum += uptr[i];
+		usum += le16_to_cpu(uptr[i]);
 	retry = 0;
 	j = 0;
 	do {
-- 
cgit v0.10.2


From b8cf368944807e29b16b24588a2a35c829bc9d9a Mon Sep 17 00:00:00 2001
From: "Paul A. Clarke" <pc@us.ibm.com>
Date: Sun, 27 Aug 2006 01:23:37 -0700
Subject: [PATCH] matroxfb: fix jittery display on non-ppc systems

I wish I was happier about this patch.  It'll serve as a placeholder for
the moment.  I'm still trying to get a G550 working in order to even
reproduce the problem this patch introduces.  I find that the G450 has
jitter even without this patch, so it won't show me what the patch changed.
 At this point, I'll continue trying to get the G550 to work, and in
parallel work with the G450 to work out the kinks.

The patch is below.

Set XDVICLKCTRL only on PPC, as doing this apparently introduces jitter on
the G550, at least on x86 architectures.

Signed-off-by: Paul A. Clarke <pc@us.ibm.com>
Signed-off-by: Petr Vandrovec <petr@vandrovec.name>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/matrox/g450_pll.c b/drivers/video/matrox/g450_pll.c
index 440272a..7c76e07 100644
--- a/drivers/video/matrox/g450_pll.c
+++ b/drivers/video/matrox/g450_pll.c
@@ -331,7 +331,15 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 					tmp |= M1064_XPIXCLKCTRL_PLL_UP;
 				}
 				matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp);
+#ifdef __powerpc__
+				/* This is necessary to avoid jitter on PowerPC
+				 * (OpenFirmware) systems, but apparently
+				 * introduces jitter, at least on a x86-64
+				 * using DVI.
+				 * A simple workaround is disable for non-PPC.
+				 */
 				matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL, 0);
+#endif /* __powerpc__ */
 				matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl);
 
 				matroxfb_DAC_unlock_irqrestore(flags);
-- 
cgit v0.10.2


From 533475d3d48eb839be2b57f6b020150abae91063 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Sun, 27 Aug 2006 01:23:39 -0700
Subject: [PATCH] vcsa attribute bits -> ioctl(VT_GETHIFONTMASK)

When reading /dev/vcsa while a font with more than 256 characters is
loaded, one of the attribute bits records the 9th bit of the character.
But depending on the console driver (vgacon or fbcon for instance), that's
bit 3 or bit 0.  And there is no way for userland to know that, thus no way
for userland to safely grab the screen content.  So here is a (tested)
patch:

Add a VT_GETHIFONTMASK ioctl for knowing which bit is the 9th bit for VC
text (vc_hi_font_mask field of the vc_data structure).

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index eccffaf..a5628a8 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -1011,6 +1011,8 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		   return -EPERM;
 		vt_dont_switch = 0;
 		return 0;
+	case VT_GETHIFONTMASK:
+		return put_user(vc->vc_hi_font_mask, (unsigned short __user *)arg);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 269d000..bea0255 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -216,6 +216,7 @@ COMPATIBLE_IOCTL(VT_RESIZE)
 COMPATIBLE_IOCTL(VT_RESIZEX)
 COMPATIBLE_IOCTL(VT_LOCKSWITCH)
 COMPATIBLE_IOCTL(VT_UNLOCKSWITCH)
+COMPATIBLE_IOCTL(VT_GETHIFONTMASK)
 /* Little p (/dev/rtc, /dev/envctrl, etc.) */
 COMPATIBLE_IOCTL(RTC_AIE_ON)
 COMPATIBLE_IOCTL(RTC_AIE_OFF)
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 8ab334a..ba806e8 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -60,5 +60,6 @@ struct vt_consize {
 #define VT_RESIZEX      0x560A  /* set kernel's idea of screensize + more */
 #define VT_LOCKSWITCH   0x560B  /* disallow vt switching */
 #define VT_UNLOCKSWITCH 0x560C  /* allow vt switching */
+#define VT_GETHIFONTMASK 0x560D  /* return hi font mask */
 
 #endif /* _LINUX_VT_H */
-- 
cgit v0.10.2


From d015baebba44613ef59ddffeae2114fa4ede7104 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 27 Aug 2006 01:23:40 -0700
Subject: [PATCH] futex_find_get_task(): remove an obscure EXIT_ZOMBIE check

futex_find_get_task:

	if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE)
		return NULL;

I can't understand this.  First, p->state can't be EXIT_ZOMBIE.  The
->exit_state check looks strange too.  Sub-threads or tasks whose ->parent
ignores SIGCHLD go directly to EXIT_DEAD state (I am ignoring a ptrace
case).  Why EXIT_DEAD tasks should be ok?  Yes, EXIT_ZOMBIE is more
important (a task may stay zombie for a long time), but this doesn't mean
we should explicitely ignore other EXIT_XXX states.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/futex.c b/kernel/futex.c
index d4633c5..b9b8aea 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -397,7 +397,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
 		p = NULL;
 		goto out_unlock;
 	}
-	if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) {
+	if (p->exit_state != 0) {
 		p = NULL;
 		goto out_unlock;
 	}
-- 
cgit v0.10.2


From fb8d81e47783f9198f3d6248bd4c0d16a1d5424e Mon Sep 17 00:00:00 2001
From: Jonathan McDowell <noodles@earth.li>
Date: Sun, 27 Aug 2006 01:23:41 -0700
Subject: [PATCH] MTD NAND: Fix ams-delta after core conversion

The recent hwctrl core conversion for MTD NAND devices broke the Amstrad
Delta driver.  This fixes it up and uses the existing control line defines
rather than unclear magic numbers.

Signed-off-by: Jonathan McDowell <noodles@earth.li>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c
index d7897dc..a0ba07c 100644
--- a/drivers/mtd/nand/ams-delta.c
+++ b/drivers/mtd/nand/ams-delta.c
@@ -130,11 +130,13 @@ static void ams_delta_hwcontrol(struct mtd_info *mtd, int cmd,
 	if (ctrl & NAND_CTRL_CHANGE) {
 		unsigned long bits;
 
-		bits = (~ctrl & NAND_NCE) << 2;
-		bits |= (ctrl & NAND_CLE) << 7;
-		bits |= (ctrl & NAND_ALE) << 6;
+		bits = (~ctrl & NAND_NCE) ? AMS_DELTA_LATCH2_NAND_NCE : 0;
+		bits |= (ctrl & NAND_CLE) ? AMS_DELTA_LATCH2_NAND_CLE : 0;
+		bits |= (ctrl & NAND_ALE) ? AMS_DELTA_LATCH2_NAND_ALE : 0;
 
-		ams_delta_latch2_write(0xC2, bits);
+		ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_CLE |
+				AMS_DELTA_LATCH2_NAND_ALE |
+				AMS_DELTA_LATCH2_NAND_NCE, bits);
 	}
 
 	if (cmd != NAND_CMD_NONE)
-- 
cgit v0.10.2


From f5fb09fa3392ad43fbcfc2f4580752f383ab5996 Mon Sep 17 00:00:00 2001
From: Andries Brouwer <Andries.Brouwer@cwi.nl>
Date: Sun, 27 Aug 2006 01:23:42 -0700
Subject: [PATCH] Fix for minix crash

Mounting a (corrupt) minix filesystem with zero s_zmap_blocks
gives a spectacular crash on my 2.6.17.8 system, no doubt
because minix/inode.c does an unconditional
	minix_set_bit(0,sbi->s_zmap[0]->b_data);

[akpm@osdl.org: make labels conistent while we're there]

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 9ea91c5..330ff9f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 	/*
 	 * Allocate the buffer map to keep the superblock small.
 	 */
+	if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+		goto out_illegal_sb;
 	i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
 	map = kmalloc(i, GFP_KERNEL);
 	if (!map)
@@ -263,7 +265,7 @@ out_no_root:
 
 out_no_bitmap:
 	printk("MINIX-fs: bad superblock or unable to read bitmaps\n");
-    out_freemap:
+out_freemap:
 	for (i = 0; i < sbi->s_imap_blocks; i++)
 		brelse(sbi->s_imap[i]);
 	for (i = 0; i < sbi->s_zmap_blocks; i++)
@@ -276,11 +278,16 @@ out_no_map:
 		printk("MINIX-fs: can't allocate map\n");
 	goto out_release;
 
+out_illegal_sb:
+	if (!silent)
+		printk("MINIX-fs: bad superblock\n");
+	goto out_release;
+
 out_no_fs:
 	if (!silent)
 		printk("VFS: Can't find a Minix or Minix V2 filesystem "
 			"on device %s\n", s->s_id);
-    out_release:
+out_release:
 	brelse(bh);
 	goto out;
 
@@ -290,7 +297,7 @@ out_bad_hblock:
 
 out_bad_sb:
 	printk("MINIX-fs: unable to read superblock\n");
- out:
+out:
 	s->s_fs_info = NULL;
 	kfree(sbi);
 	return -EINVAL;
-- 
cgit v0.10.2


From 607eb266aea9dd2abe515985e12c5cd8b32546e8 Mon Sep 17 00:00:00 2001
From: Andries Brouwer <Andries.Brouwer@cwi.nl>
Date: Sun, 27 Aug 2006 01:23:43 -0700
Subject: [PATCH] ext2: prevent div-by-zero on corrupted fs

Mounting an ext2 filesystem with zero s_inodes_per_group will cause a
divide error.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f2702cd..681dea8 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	if (EXT2_INODE_SIZE(sb) == 0)
 		goto cantfind_ext2;
 	sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
-	if (sbi->s_inodes_per_block == 0)
+	if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
 		goto cantfind_ext2;
 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
 					sbi->s_inodes_per_block;
-- 
cgit v0.10.2


From 08fb306fe63d98eb86e3b16f4cc21816fa47f18e Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 27 Aug 2006 01:23:44 -0700
Subject: [PATCH] ext3 filesystem bogus ENOSPC with reservation fix

To handle the earlier bogus ENOSPC error caused by filesystem full of block
reservation, current code falls back to non block reservation, starts to
allocate block(s) from the goal allocation block group as if there is no
block reservation.

Current code needs to re-load the corresponding block group descriptor for
the initial goal block group in this case.  The patch fixes this.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a504a40..063d994 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 		goal = le32_to_cpu(es->s_first_data_block);
 	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
 			EXT3_BLOCKS_PER_GROUP(sb);
+	goal_group = group_no;
+retry_alloc:
 	gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
 	if (!gdp)
 		goto io_error;
 
-	goal_group = group_no;
-retry:
 	free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
 	/*
 	 * if there is not enough free blocks to make a new resevation
@@ -1349,7 +1349,7 @@ retry:
 	if (my_rsv) {
 		my_rsv = NULL;
 		group_no = goal_group;
-		goto retry;
+		goto retry_alloc;
 	}
 	/* No space left on the device */
 	*errp = -ENOSPC;
-- 
cgit v0.10.2


From c37336b078ba9d2ff38c535b194996a7ad6e69f8 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Sun, 27 Aug 2006 01:23:45 -0700
Subject: [PATCH] ufs: write to hole in big file

On UFS, this scenario:
	open(O_TRUNC)
	lseek(1024 * 1024 * 80)
	write("A")
	lseek(1024 * 2)
	write("A")

may cause access to invalid address.

This happened because of "goal" is calculated in wrong way in block
allocation path, as I see this problem exists also in 2.4.

We use construction like this i_data[lastfrag], i_data array of pointers to
direct blocks, indirect and so on, it has ceratain size ~20 elements, and
lastfrag may have value for example 40000.

Also this patch fixes related to handling such scenario issues, wrong
zeroing metadata, in case of block(not fragment) allocation, and wrong goal
calculation, when we allocate block

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e7c8615..30c6e8a9 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
 
 static struct buffer_head *
 ufs_clear_frags(struct inode *inode, sector_t beg,
-		unsigned int n)
+		unsigned int n, sector_t want)
 {
-	struct buffer_head *res, *bh;
+	struct buffer_head *res = NULL, *bh;
 	sector_t end = beg + n;
 
-	res = sb_getblk(inode->i_sb, beg);
-	ufs_clear_frag(inode, res);
-	for (++beg; beg < end; ++beg) {
+	for (; beg < end; ++beg) {
 		bh = sb_getblk(inode->i_sb, beg);
 		ufs_clear_frag(inode, bh);
-		brelse(bh);
+		if (want != beg)
+			brelse(bh);
+		else
+			res = bh;
 	}
+	BUG_ON(!res);
 	return res;
 }
 
@@ -265,7 +267,9 @@ repeat:
 			lastfrag = ufsi->i_lastfrag;
 			
 		}
-		goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb;
+		tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]);
+		if (tmp)
+			goal = tmp + uspi->s_fpb;
 		tmp = ufs_new_fragments (inode, p, fragment - blockoff, 
 					 goal, required + blockoff,
 					 err, locked_page);
@@ -277,13 +281,15 @@ repeat:
 		tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
 					fs32_to_cpu(sb, *p), required +  (blockoff - lastblockoff),
 					err, locked_page);
-	}
+	} else /* (lastblock > block) */ {
 	/*
 	 * We will allocate new block before last allocated block
 	 */
-	else /* (lastblock > block) */ {
-		if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1])))
-			goal = tmp + uspi->s_fpb;
+		if (block) {
+			tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]);
+			if (tmp)
+				goal = tmp + uspi->s_fpb;
+		}
 		tmp = ufs_new_fragments(inode, p, fragment - blockoff,
 					goal, uspi->s_fpb, err, locked_page);
 	}
@@ -296,7 +302,7 @@ repeat:
 	}
 
 	if (!phys) {
-		result = ufs_clear_frags(inode, tmp + blockoff, required);
+		result = ufs_clear_frags(inode, tmp, required, tmp + blockoff);
 	} else {
 		*phys = tmp + blockoff;
 		result = NULL;
@@ -383,7 +389,7 @@ repeat:
 		}
 	}
 
-	if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb))
+	if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1])))
 		goal = tmp + uspi->s_fpb;
 	else
 		goal = bh->b_blocknr + uspi->s_fpb;
@@ -397,7 +403,8 @@ repeat:
 
 
 	if (!phys) {
-		result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb);
+		result = ufs_clear_frags(inode, tmp, uspi->s_fpb,
+					 tmp + blockoff);
 	} else {
 		*phys = tmp + blockoff;
 		*new = 1;
-- 
cgit v0.10.2


From ecdc63948763586e101108dfe1ba316ec069fe39 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Sun, 27 Aug 2006 01:23:46 -0700
Subject: [PATCH] ufs: truncate correction

1) When we allocated last fragment in ufs_truncate, we read page, check
   if block mapped to address, and if not trying to allocate it.  This is
   wrong behaviour, fragment may be NOT allocated, but mapped, this
   happened because of "block map" function not checked allocated fragment
   or not, it just take address of the first fragment in the block, add
   offset of fragment and return result, this is correct behaviour in
   almost all situation except call from ufs_truncate.

2) Almost all implementation of UFS, which I can investigate have such
   "defect": if you have full disk, and try truncate file, for example 3GB
   to 2MB, and have hole in this region, truncate return -ENOSPC.  I tried
   evade from this problem, but "block allocation" algorithm is tied to
   right value of i_lastfrag, and fix of this corner case may slow down of
   ordinaries scenarios, so this patch makes behavior of "truncate"
   operations similar to what other UFS implementations do.

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index c9b5587..ea11d04 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode)
 	int err = 0;
 	struct address_space *mapping = inode->i_mapping;
 	struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
-	struct ufs_inode_info *ufsi = UFS_I(inode);
 	unsigned lastfrag, i, end;
 	struct page *lastpage;
 	struct buffer_head *bh;
 
 	lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
 
-	if (!lastfrag) {
-		ufsi->i_lastfrag = 0;
+	if (!lastfrag)
 		goto out;
-	}
+
 	lastfrag--;
 
 	lastpage = ufs_get_locked_page(mapping, lastfrag >>
@@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode)
        for (i = 0; i < end; ++i)
                bh = bh->b_this_page;
 
-       if (!buffer_mapped(bh)) {
-               err = ufs_getfrag_block(inode, lastfrag, bh, 1);
-
-               if (unlikely(err))
-                       goto out_unlock;
-
-               if (buffer_new(bh)) {
-                       clear_buffer_new(bh);
-                       unmap_underlying_metadata(bh->b_bdev,
-						 bh->b_blocknr);
-		       /*
-			* we do not zeroize fragment, because of
-			* if it maped to hole, it already contains zeroes
-			*/
-                       set_buffer_uptodate(bh);
-                       mark_buffer_dirty(bh);
-                       set_page_dirty(lastpage);
-               }
+
+       err = ufs_getfrag_block(inode, lastfrag, bh, 1);
+
+       if (unlikely(err))
+	       goto out_unlock;
+
+       if (buffer_new(bh)) {
+	       clear_buffer_new(bh);
+	       unmap_underlying_metadata(bh->b_bdev,
+					 bh->b_blocknr);
+	       /*
+		* we do not zeroize fragment, because of
+		* if it maped to hole, it already contains zeroes
+		*/
+	       set_buffer_uptodate(bh);
+	       mark_buffer_dirty(bh);
+	       set_page_dirty(lastpage);
        }
+
 out_unlock:
        ufs_put_locked_page(lastpage);
 out:
@@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return -EPERM;
 
-	if (inode->i_size > old_i_size) {
-		/*
-		 * if we expand file we should care about
-		 * allocation of block for last byte first of all
-		 */
-		err = ufs_alloc_lastblock(inode);
+	err = ufs_alloc_lastblock(inode);
 
-		if (err) {
-			i_size_write(inode, old_i_size);
-			goto out;
-		}
-		/*
-		 * go away, because of we expand file, and we do not
-		 * need free blocks, and zeroizes page
-		 */
-		lock_kernel();
-		goto almost_end;
+	if (err) {
+		i_size_write(inode, old_i_size);
+		goto out;
 	}
 
 	block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
@@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 		yield();
 	}
 
-	if (inode->i_size < old_i_size) {
-		/*
-		 * now we should have enough space
-		 * to allocate block for last byte
-		 */
-		err = ufs_alloc_lastblock(inode);
-		if (err)
-			/*
-			 * looks like all the same - we have no space,
-			 * but we truncate file already
-			 */
-			inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize;
-	}
-almost_end:
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	unlock_kernel();
 	mark_inode_dirty(inode);
 out:
-- 
cgit v0.10.2


From 4edb9a143e31d2e191c199262226e1a5923ff8f7 Mon Sep 17 00:00:00 2001
From: Yingchao Zhou <yingchao.zhou@gmail.com>
Date: Sun, 27 Aug 2006 01:23:46 -0700
Subject: [PATCH] Remove redundant up() in stop_machine()

An up() is called in kernel/stop_machine.c on failure, and also in the
caller (unconditionally).

Signed-off-by: Zhou Yingchao <yingchao.zhou@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index dcfb5d7..51cacd1 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -111,7 +111,6 @@ static int stop_machine(void)
 	/* If some failed, kill them all. */
 	if (ret < 0) {
 		stopmachine_set_state(STOPMACHINE_EXIT);
-		up(&stopmachine_mutex);
 		return ret;
 	}
 
-- 
cgit v0.10.2


From e88d78f6ba50d773096e26ca3f5c2464853c682d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@us.ibm.com>
Date: Sun, 27 Aug 2006 01:23:47 -0700
Subject: [PATCH] Documentation update for relay interface

Here's updated documentation for the relay interface, rewritten to match
the relayfs->relay changes.  It also moves relayfs.txt to relay.txt in the
process.

It includes the changes to relayfs.txt previously posted by Randy Dunlap,
thanks for those.

The relay-apps examples have also been updated to match, and can be found
on the sourceforge relayfs website.

Signed-off-by: Tom Zanussi <zanussi@us.ibm.com>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 66fdc07..16dec61 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -62,8 +62,8 @@ ramfs-rootfs-initramfs.txt
 	- info on the 'in memory' filesystems ramfs, rootfs and initramfs.
 reiser4.txt
 	- info on the Reiser4 filesystem based on dancing tree algorithms.
-relayfs.txt
-	- info on relayfs, for efficient streaming from kernel to user space.
+relay.txt
+	- info on relay, for efficient streaming from kernel to user space.
 romfs.txt
 	- description of the ROMFS filesystem.
 smbfs.txt
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt
new file mode 100644
index 0000000..d6788da
--- /dev/null
+++ b/Documentation/filesystems/relay.txt
@@ -0,0 +1,479 @@
+relay interface (formerly relayfs)
+==================================
+
+The relay interface provides a means for kernel applications to
+efficiently log and transfer large quantities of data from the kernel
+to userspace via user-defined 'relay channels'.
+
+A 'relay channel' is a kernel->user data relay mechanism implemented
+as a set of per-cpu kernel buffers ('channel buffers'), each
+represented as a regular file ('relay file') in user space.  Kernel
+clients write into the channel buffers using efficient write
+functions; these automatically log into the current cpu's channel
+buffer.  User space applications mmap() or read() from the relay files
+and retrieve the data as it becomes available.  The relay files
+themselves are files created in a host filesystem, e.g. debugfs, and
+are associated with the channel buffers using the API described below.
+
+The format of the data logged into the channel buffers is completely
+up to the kernel client; the relay interface does however provide
+hooks which allow kernel clients to impose some structure on the
+buffer data.  The relay interface doesn't implement any form of data
+filtering - this also is left to the kernel client.  The purpose is to
+keep things as simple as possible.
+
+This document provides an overview of the relay interface API.  The
+details of the function parameters are documented along with the
+functions in the relay interface code - please see that for details.
+
+Semantics
+=========
+
+Each relay channel has one buffer per CPU, each buffer has one or more
+sub-buffers.  Messages are written to the first sub-buffer until it is
+too full to contain a new message, in which case it it is written to
+the next (if available).  Messages are never split across sub-buffers.
+At this point, userspace can be notified so it empties the first
+sub-buffer, while the kernel continues writing to the next.
+
+When notified that a sub-buffer is full, the kernel knows how many
+bytes of it are padding i.e. unused space occurring because a complete
+message couldn't fit into a sub-buffer.  Userspace can use this
+knowledge to copy only valid data.
+
+After copying it, userspace can notify the kernel that a sub-buffer
+has been consumed.
+
+A relay channel can operate in a mode where it will overwrite data not
+yet collected by userspace, and not wait for it to be consumed.
+
+The relay channel itself does not provide for communication of such
+data between userspace and kernel, allowing the kernel side to remain
+simple and not impose a single interface on userspace.  It does
+provide a set of examples and a separate helper though, described
+below.
+
+The read() interface both removes padding and internally consumes the
+read sub-buffers; thus in cases where read(2) is being used to drain
+the channel buffers, special-purpose communication between kernel and
+user isn't necessary for basic operation.
+
+One of the major goals of the relay interface is to provide a low
+overhead mechanism for conveying kernel data to userspace.  While the
+read() interface is easy to use, it's not as efficient as the mmap()
+approach; the example code attempts to make the tradeoff between the
+two approaches as small as possible.
+
+klog and relay-apps example code
+================================
+
+The relay interface itself is ready to use, but to make things easier,
+a couple simple utility functions and a set of examples are provided.
+
+The relay-apps example tarball, available on the relay sourceforge
+site, contains a set of self-contained examples, each consisting of a
+pair of .c files containing boilerplate code for each of the user and
+kernel sides of a relay application.  When combined these two sets of
+boilerplate code provide glue to easily stream data to disk, without
+having to bother with mundane housekeeping chores.
+
+The 'klog debugging functions' patch (klog.patch in the relay-apps
+tarball) provides a couple of high-level logging functions to the
+kernel which allow writing formatted text or raw data to a channel,
+regardless of whether a channel to write into exists or not, or even
+whether the relay interface is compiled into the kernel or not.  These
+functions allow you to put unconditional 'trace' statements anywhere
+in the kernel or kernel modules; only when there is a 'klog handler'
+registered will data actually be logged (see the klog and kleak
+examples for details).
+
+It is of course possible to use the relay interface from scratch,
+i.e. without using any of the relay-apps example code or klog, but
+you'll have to implement communication between userspace and kernel,
+allowing both to convey the state of buffers (full, empty, amount of
+padding).  The read() interface both removes padding and internally
+consumes the read sub-buffers; thus in cases where read(2) is being
+used to drain the channel buffers, special-purpose communication
+between kernel and user isn't necessary for basic operation.  Things
+such as buffer-full conditions would still need to be communicated via
+some channel though.
+
+klog and the relay-apps examples can be found in the relay-apps
+tarball on http://relayfs.sourceforge.net
+
+The relay interface user space API
+==================================
+
+The relay interface implements basic file operations for user space
+access to relay channel buffer data.  Here are the file operations
+that are available and some comments regarding their behavior:
+
+open()	    enables user to open an _existing_ channel buffer.
+
+mmap()      results in channel buffer being mapped into the caller's
+	    memory space. Note that you can't do a partial mmap - you
+	    must map the entire file, which is NRBUF * SUBBUFSIZE.
+
+read()      read the contents of a channel buffer.  The bytes read are
+	    'consumed' by the reader, i.e. they won't be available
+	    again to subsequent reads.  If the channel is being used
+	    in no-overwrite mode (the default), it can be read at any
+	    time even if there's an active kernel writer.  If the
+	    channel is being used in overwrite mode and there are
+	    active channel writers, results may be unpredictable -
+	    users should make sure that all logging to the channel has
+	    ended before using read() with overwrite mode.  Sub-buffer
+	    padding is automatically removed and will not be seen by
+	    the reader.
+
+sendfile()  transfer data from a channel buffer to an output file
+	    descriptor. Sub-buffer padding is automatically removed
+	    and will not be seen by the reader.
+
+poll()      POLLIN/POLLRDNORM/POLLERR supported.  User applications are
+	    notified when sub-buffer boundaries are crossed.
+
+close()     decrements the channel buffer's refcount.  When the refcount
+	    reaches 0, i.e. when no process or kernel client has the
+	    buffer open, the channel buffer is freed.
+
+In order for a user application to make use of relay files, the
+host filesystem must be mounted.  For example,
+
+	mount -t debugfs debugfs /debug
+
+NOTE:   the host filesystem doesn't need to be mounted for kernel
+	clients to create or use channels - it only needs to be
+	mounted when user space applications need access to the buffer
+	data.
+
+
+The relay interface kernel API
+==============================
+
+Here's a summary of the API the relay interface provides to in-kernel clients:
+
+TBD(curr. line MT:/API/)
+  channel management functions:
+
+    relay_open(base_filename, parent, subbuf_size, n_subbufs,
+               callbacks)
+    relay_close(chan)
+    relay_flush(chan)
+    relay_reset(chan)
+
+  channel management typically called on instigation of userspace:
+
+    relay_subbufs_consumed(chan, cpu, subbufs_consumed)
+
+  write functions:
+
+    relay_write(chan, data, length)
+    __relay_write(chan, data, length)
+    relay_reserve(chan, length)
+
+  callbacks:
+
+    subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
+    buf_mapped(buf, filp)
+    buf_unmapped(buf, filp)
+    create_buf_file(filename, parent, mode, buf, is_global)
+    remove_buf_file(dentry)
+
+  helper functions:
+
+    relay_buf_full(buf)
+    subbuf_start_reserve(buf, length)
+
+
+Creating a channel
+------------------
+
+relay_open() is used to create a channel, along with its per-cpu
+channel buffers.  Each channel buffer will have an associated file
+created for it in the host filesystem, which can be and mmapped or
+read from in user space.  The files are named basename0...basenameN-1
+where N is the number of online cpus, and by default will be created
+in the root of the filesystem (if the parent param is NULL).  If you
+want a directory structure to contain your relay files, you should
+create it using the host filesystem's directory creation function,
+e.g. debugfs_create_dir(), and pass the parent directory to
+relay_open().  Users are responsible for cleaning up any directory
+structure they create, when the channel is closed - again the host
+filesystem's directory removal functions should be used for that,
+e.g. debugfs_remove().
+
+In order for a channel to be created and the host filesystem's files
+associated with its channel buffers, the user must provide definitions
+for two callback functions, create_buf_file() and remove_buf_file().
+create_buf_file() is called once for each per-cpu buffer from
+relay_open() and allows the user to create the file which will be used
+to represent the corresponding channel buffer.  The callback should
+return the dentry of the file created to represent the channel buffer.
+remove_buf_file() must also be defined; it's responsible for deleting
+the file(s) created in create_buf_file() and is called during
+relay_close().
+
+Here are some typical definitions for these callbacks, in this case
+using debugfs:
+
+/*
+ * create_buf_file() callback.  Creates relay file in debugfs.
+ */
+static struct dentry *create_buf_file_handler(const char *filename,
+                                              struct dentry *parent,
+                                              int mode,
+                                              struct rchan_buf *buf,
+                                              int *is_global)
+{
+        return debugfs_create_file(filename, mode, parent, buf,
+	                           &relay_file_operations);
+}
+
+/*
+ * remove_buf_file() callback.  Removes relay file from debugfs.
+ */
+static int remove_buf_file_handler(struct dentry *dentry)
+{
+        debugfs_remove(dentry);
+
+        return 0;
+}
+
+/*
+ * relay interface callbacks
+ */
+static struct rchan_callbacks relay_callbacks =
+{
+        .create_buf_file = create_buf_file_handler,
+        .remove_buf_file = remove_buf_file_handler,
+};
+
+And an example relay_open() invocation using them:
+
+  chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks);
+
+If the create_buf_file() callback fails, or isn't defined, channel
+creation and thus relay_open() will fail.
+
+The total size of each per-cpu buffer is calculated by multiplying the
+number of sub-buffers by the sub-buffer size passed into relay_open().
+The idea behind sub-buffers is that they're basically an extension of
+double-buffering to N buffers, and they also allow applications to
+easily implement random-access-on-buffer-boundary schemes, which can
+be important for some high-volume applications.  The number and size
+of sub-buffers is completely dependent on the application and even for
+the same application, different conditions will warrant different
+values for these parameters at different times.  Typically, the right
+values to use are best decided after some experimentation; in general,
+though, it's safe to assume that having only 1 sub-buffer is a bad
+idea - you're guaranteed to either overwrite data or lose events
+depending on the channel mode being used.
+
+The create_buf_file() implementation can also be defined in such a way
+as to allow the creation of a single 'global' buffer instead of the
+default per-cpu set.  This can be useful for applications interested
+mainly in seeing the relative ordering of system-wide events without
+the need to bother with saving explicit timestamps for the purpose of
+merging/sorting per-cpu files in a postprocessing step.
+
+To have relay_open() create a global buffer, the create_buf_file()
+implementation should set the value of the is_global outparam to a
+non-zero value in addition to creating the file that will be used to
+represent the single buffer.  In the case of a global buffer,
+create_buf_file() and remove_buf_file() will be called only once.  The
+normal channel-writing functions, e.g. relay_write(), can still be
+used - writes from any cpu will transparently end up in the global
+buffer - but since it is a global buffer, callers should make sure
+they use the proper locking for such a buffer, either by wrapping
+writes in a spinlock, or by copying a write function from relay.h and
+creating a local version that internally does the proper locking.
+
+Channel 'modes'
+---------------
+
+relay channels can be used in either of two modes - 'overwrite' or
+'no-overwrite'.  The mode is entirely determined by the implementation
+of the subbuf_start() callback, as described below.  The default if no
+subbuf_start() callback is defined is 'no-overwrite' mode.  If the
+default mode suits your needs, and you plan to use the read()
+interface to retrieve channel data, you can ignore the details of this
+section, as it pertains mainly to mmap() implementations.
+
+In 'overwrite' mode, also known as 'flight recorder' mode, writes
+continuously cycle around the buffer and will never fail, but will
+unconditionally overwrite old data regardless of whether it's actually
+been consumed.  In no-overwrite mode, writes will fail, i.e. data will
+be lost, if the number of unconsumed sub-buffers equals the total
+number of sub-buffers in the channel.  It should be clear that if
+there is no consumer or if the consumer can't consume sub-buffers fast
+enough, data will be lost in either case; the only difference is
+whether data is lost from the beginning or the end of a buffer.
+
+As explained above, a relay channel is made of up one or more
+per-cpu channel buffers, each implemented as a circular buffer
+subdivided into one or more sub-buffers.  Messages are written into
+the current sub-buffer of the channel's current per-cpu buffer via the
+write functions described below.  Whenever a message can't fit into
+the current sub-buffer, because there's no room left for it, the
+client is notified via the subbuf_start() callback that a switch to a
+new sub-buffer is about to occur.  The client uses this callback to 1)
+initialize the next sub-buffer if appropriate 2) finalize the previous
+sub-buffer if appropriate and 3) return a boolean value indicating
+whether or not to actually move on to the next sub-buffer.
+
+To implement 'no-overwrite' mode, the userspace client would provide
+an implementation of the subbuf_start() callback something like the
+following:
+
+static int subbuf_start(struct rchan_buf *buf,
+                        void *subbuf,
+			void *prev_subbuf,
+			unsigned int prev_padding)
+{
+	if (prev_subbuf)
+		*((unsigned *)prev_subbuf) = prev_padding;
+
+	if (relay_buf_full(buf))
+		return 0;
+
+	subbuf_start_reserve(buf, sizeof(unsigned int));
+
+	return 1;
+}
+
+If the current buffer is full, i.e. all sub-buffers remain unconsumed,
+the callback returns 0 to indicate that the buffer switch should not
+occur yet, i.e. until the consumer has had a chance to read the
+current set of ready sub-buffers.  For the relay_buf_full() function
+to make sense, the consumer is reponsible for notifying the relay
+interface when sub-buffers have been consumed via
+relay_subbufs_consumed().  Any subsequent attempts to write into the
+buffer will again invoke the subbuf_start() callback with the same
+parameters; only when the consumer has consumed one or more of the
+ready sub-buffers will relay_buf_full() return 0, in which case the
+buffer switch can continue.
+
+The implementation of the subbuf_start() callback for 'overwrite' mode
+would be very similar:
+
+static int subbuf_start(struct rchan_buf *buf,
+                        void *subbuf,
+			void *prev_subbuf,
+			unsigned int prev_padding)
+{
+	if (prev_subbuf)
+		*((unsigned *)prev_subbuf) = prev_padding;
+
+	subbuf_start_reserve(buf, sizeof(unsigned int));
+
+	return 1;
+}
+
+In this case, the relay_buf_full() check is meaningless and the
+callback always returns 1, causing the buffer switch to occur
+unconditionally.  It's also meaningless for the client to use the
+relay_subbufs_consumed() function in this mode, as it's never
+consulted.
+
+The default subbuf_start() implementation, used if the client doesn't
+define any callbacks, or doesn't define the subbuf_start() callback,
+implements the simplest possible 'no-overwrite' mode, i.e. it does
+nothing but return 0.
+
+Header information can be reserved at the beginning of each sub-buffer
+by calling the subbuf_start_reserve() helper function from within the
+subbuf_start() callback.  This reserved area can be used to store
+whatever information the client wants.  In the example above, room is
+reserved in each sub-buffer to store the padding count for that
+sub-buffer.  This is filled in for the previous sub-buffer in the
+subbuf_start() implementation; the padding value for the previous
+sub-buffer is passed into the subbuf_start() callback along with a
+pointer to the previous sub-buffer, since the padding value isn't
+known until a sub-buffer is filled.  The subbuf_start() callback is
+also called for the first sub-buffer when the channel is opened, to
+give the client a chance to reserve space in it.  In this case the
+previous sub-buffer pointer passed into the callback will be NULL, so
+the client should check the value of the prev_subbuf pointer before
+writing into the previous sub-buffer.
+
+Writing to a channel
+--------------------
+
+Kernel clients write data into the current cpu's channel buffer using
+relay_write() or __relay_write().  relay_write() is the main logging
+function - it uses local_irqsave() to protect the buffer and should be
+used if you might be logging from interrupt context.  If you know
+you'll never be logging from interrupt context, you can use
+__relay_write(), which only disables preemption.  These functions
+don't return a value, so you can't determine whether or not they
+failed - the assumption is that you wouldn't want to check a return
+value in the fast logging path anyway, and that they'll always succeed
+unless the buffer is full and no-overwrite mode is being used, in
+which case you can detect a failed write in the subbuf_start()
+callback by calling the relay_buf_full() helper function.
+
+relay_reserve() is used to reserve a slot in a channel buffer which
+can be written to later.  This would typically be used in applications
+that need to write directly into a channel buffer without having to
+stage data in a temporary buffer beforehand.  Because the actual write
+may not happen immediately after the slot is reserved, applications
+using relay_reserve() can keep a count of the number of bytes actually
+written, either in space reserved in the sub-buffers themselves or as
+a separate array.  See the 'reserve' example in the relay-apps tarball
+at http://relayfs.sourceforge.net for an example of how this can be
+done.  Because the write is under control of the client and is
+separated from the reserve, relay_reserve() doesn't protect the buffer
+at all - it's up to the client to provide the appropriate
+synchronization when using relay_reserve().
+
+Closing a channel
+-----------------
+
+The client calls relay_close() when it's finished using the channel.
+The channel and its associated buffers are destroyed when there are no
+longer any references to any of the channel buffers.  relay_flush()
+forces a sub-buffer switch on all the channel buffers, and can be used
+to finalize and process the last sub-buffers before the channel is
+closed.
+
+Misc
+----
+
+Some applications may want to keep a channel around and re-use it
+rather than open and close a new channel for each use.  relay_reset()
+can be used for this purpose - it resets a channel to its initial
+state without reallocating channel buffer memory or destroying
+existing mappings.  It should however only be called when it's safe to
+do so, i.e. when the channel isn't currently being written to.
+
+Finally, there are a couple of utility callbacks that can be used for
+different purposes.  buf_mapped() is called whenever a channel buffer
+is mmapped from user space and buf_unmapped() is called when it's
+unmapped.  The client can use this notification to trigger actions
+within the kernel application, such as enabling/disabling logging to
+the channel.
+
+
+Resources
+=========
+
+For news, example code, mailing list, etc. see the relay interface homepage:
+
+    http://relayfs.sourceforge.net
+
+
+Credits
+=======
+
+The ideas and specs for the relay interface came about as a result of
+discussions on tracing involving the following:
+
+Michel Dagenais		<michel.dagenais@polymtl.ca>
+Richard Moore		<richardj_moore@uk.ibm.com>
+Bob Wisniewski		<bob@watson.ibm.com>
+Karim Yaghmour		<karim@opersys.com>
+Tom Zanussi		<zanussi@us.ibm.com>
+
+Also thanks to Hubertus Franke for a lot of useful suggestions and bug
+reports.
diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt
deleted file mode 100644
index 5832377..0000000
--- a/Documentation/filesystems/relayfs.txt
+++ /dev/null
@@ -1,442 +0,0 @@
-
-relayfs - a high-speed data relay filesystem
-============================================
-
-relayfs is a filesystem designed to provide an efficient mechanism for
-tools and facilities to relay large and potentially sustained streams
-of data from kernel space to user space.
-
-The main abstraction of relayfs is the 'channel'.  A channel consists
-of a set of per-cpu kernel buffers each represented by a file in the
-relayfs filesystem.  Kernel clients write into a channel using
-efficient write functions which automatically log to the current cpu's
-channel buffer.  User space applications mmap() the per-cpu files and
-retrieve the data as it becomes available.
-
-The format of the data logged into the channel buffers is completely
-up to the relayfs client; relayfs does however provide hooks which
-allow clients to impose some structure on the buffer data.  Nor does
-relayfs implement any form of data filtering - this also is left to
-the client.  The purpose is to keep relayfs as simple as possible.
-
-This document provides an overview of the relayfs API.  The details of
-the function parameters are documented along with the functions in the
-filesystem code - please see that for details.
-
-Semantics
-=========
-
-Each relayfs channel has one buffer per CPU, each buffer has one or
-more sub-buffers. Messages are written to the first sub-buffer until
-it is too full to contain a new message, in which case it it is
-written to the next (if available).  Messages are never split across
-sub-buffers.  At this point, userspace can be notified so it empties
-the first sub-buffer, while the kernel continues writing to the next.
-
-When notified that a sub-buffer is full, the kernel knows how many
-bytes of it are padding i.e. unused.  Userspace can use this knowledge
-to copy only valid data.
-
-After copying it, userspace can notify the kernel that a sub-buffer
-has been consumed.
-
-relayfs can operate in a mode where it will overwrite data not yet
-collected by userspace, and not wait for it to consume it.
-
-relayfs itself does not provide for communication of such data between
-userspace and kernel, allowing the kernel side to remain simple and
-not impose a single interface on userspace. It does provide a set of
-examples and a separate helper though, described below.
-
-klog and relay-apps example code
-================================
-
-relayfs itself is ready to use, but to make things easier, a couple
-simple utility functions and a set of examples are provided.
-
-The relay-apps example tarball, available on the relayfs sourceforge
-site, contains a set of self-contained examples, each consisting of a
-pair of .c files containing boilerplate code for each of the user and
-kernel sides of a relayfs application; combined these two sets of
-boilerplate code provide glue to easily stream data to disk, without
-having to bother with mundane housekeeping chores.
-
-The 'klog debugging functions' patch (klog.patch in the relay-apps
-tarball) provides a couple of high-level logging functions to the
-kernel which allow writing formatted text or raw data to a channel,
-regardless of whether a channel to write into exists or not, or
-whether relayfs is compiled into the kernel or is configured as a
-module.  These functions allow you to put unconditional 'trace'
-statements anywhere in the kernel or kernel modules; only when there
-is a 'klog handler' registered will data actually be logged (see the
-klog and kleak examples for details).
-
-It is of course possible to use relayfs from scratch i.e. without
-using any of the relay-apps example code or klog, but you'll have to
-implement communication between userspace and kernel, allowing both to
-convey the state of buffers (full, empty, amount of padding).
-
-klog and the relay-apps examples can be found in the relay-apps
-tarball on http://relayfs.sourceforge.net
-
-
-The relayfs user space API
-==========================
-
-relayfs implements basic file operations for user space access to
-relayfs channel buffer data.  Here are the file operations that are
-available and some comments regarding their behavior:
-
-open()	 enables user to open an _existing_ buffer.
-
-mmap()	 results in channel buffer being mapped into the caller's
-	 memory space. Note that you can't do a partial mmap - you must
-	 map the entire file, which is NRBUF * SUBBUFSIZE.
-
-read()	 read the contents of a channel buffer.  The bytes read are
-	 'consumed' by the reader i.e. they won't be available again
-	 to subsequent reads.  If the channel is being used in
-	 no-overwrite mode (the default), it can be read at any time
-	 even if there's an active kernel writer.  If the channel is
-	 being used in overwrite mode and there are active channel
-	 writers, results may be unpredictable - users should make
-	 sure that all logging to the channel has ended before using
-	 read() with overwrite mode.
-
-poll()	 POLLIN/POLLRDNORM/POLLERR supported.  User applications are
-	 notified when sub-buffer boundaries are crossed.
-
-close() decrements the channel buffer's refcount.  When the refcount
-	reaches 0 i.e. when no process or kernel client has the buffer
-	open, the channel buffer is freed.
-
-
-In order for a user application to make use of relayfs files, the
-relayfs filesystem must be mounted.  For example,
-
-	mount -t relayfs relayfs /mnt/relay
-
-NOTE:	relayfs doesn't need to be mounted for kernel clients to create
-	or use channels - it only needs to be mounted when user space
-	applications need access to the buffer data.
-
-
-The relayfs kernel API
-======================
-
-Here's a summary of the API relayfs provides to in-kernel clients:
-
-
-  channel management functions:
-
-    relay_open(base_filename, parent, subbuf_size, n_subbufs,
-               callbacks)
-    relay_close(chan)
-    relay_flush(chan)
-    relay_reset(chan)
-    relayfs_create_dir(name, parent)
-    relayfs_remove_dir(dentry)
-    relayfs_create_file(name, parent, mode, fops, data)
-    relayfs_remove_file(dentry)
-
-  channel management typically called on instigation of userspace:
-
-    relay_subbufs_consumed(chan, cpu, subbufs_consumed)
-
-  write functions:
-
-    relay_write(chan, data, length)
-    __relay_write(chan, data, length)
-    relay_reserve(chan, length)
-
-  callbacks:
-
-    subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
-    buf_mapped(buf, filp)
-    buf_unmapped(buf, filp)
-    create_buf_file(filename, parent, mode, buf, is_global)
-    remove_buf_file(dentry)
-
-  helper functions:
-
-    relay_buf_full(buf)
-    subbuf_start_reserve(buf, length)
-
-
-Creating a channel
-------------------
-
-relay_open() is used to create a channel, along with its per-cpu
-channel buffers.  Each channel buffer will have an associated file
-created for it in the relayfs filesystem, which can be opened and
-mmapped from user space if desired.  The files are named
-basename0...basenameN-1 where N is the number of online cpus, and by
-default will be created in the root of the filesystem.  If you want a
-directory structure to contain your relayfs files, you can create it
-with relayfs_create_dir() and pass the parent directory to
-relay_open().  Clients are responsible for cleaning up any directory
-structure they create when the channel is closed - use
-relayfs_remove_dir() for that.
-
-The total size of each per-cpu buffer is calculated by multiplying the
-number of sub-buffers by the sub-buffer size passed into relay_open().
-The idea behind sub-buffers is that they're basically an extension of
-double-buffering to N buffers, and they also allow applications to
-easily implement random-access-on-buffer-boundary schemes, which can
-be important for some high-volume applications.  The number and size
-of sub-buffers is completely dependent on the application and even for
-the same application, different conditions will warrant different
-values for these parameters at different times.  Typically, the right
-values to use are best decided after some experimentation; in general,
-though, it's safe to assume that having only 1 sub-buffer is a bad
-idea - you're guaranteed to either overwrite data or lose events
-depending on the channel mode being used.
-
-Channel 'modes'
----------------
-
-relayfs channels can be used in either of two modes - 'overwrite' or
-'no-overwrite'.  The mode is entirely determined by the implementation
-of the subbuf_start() callback, as described below.  In 'overwrite'
-mode, also known as 'flight recorder' mode, writes continuously cycle
-around the buffer and will never fail, but will unconditionally
-overwrite old data regardless of whether it's actually been consumed.
-In no-overwrite mode, writes will fail i.e. data will be lost, if the
-number of unconsumed sub-buffers equals the total number of
-sub-buffers in the channel.  It should be clear that if there is no
-consumer or if the consumer can't consume sub-buffers fast enought,
-data will be lost in either case; the only difference is whether data
-is lost from the beginning or the end of a buffer.
-
-As explained above, a relayfs channel is made of up one or more
-per-cpu channel buffers, each implemented as a circular buffer
-subdivided into one or more sub-buffers.  Messages are written into
-the current sub-buffer of the channel's current per-cpu buffer via the
-write functions described below.  Whenever a message can't fit into
-the current sub-buffer, because there's no room left for it, the
-client is notified via the subbuf_start() callback that a switch to a
-new sub-buffer is about to occur.  The client uses this callback to 1)
-initialize the next sub-buffer if appropriate 2) finalize the previous
-sub-buffer if appropriate and 3) return a boolean value indicating
-whether or not to actually go ahead with the sub-buffer switch.
-
-To implement 'no-overwrite' mode, the userspace client would provide
-an implementation of the subbuf_start() callback something like the
-following:
-
-static int subbuf_start(struct rchan_buf *buf,
-                        void *subbuf,
-			void *prev_subbuf,
-			unsigned int prev_padding)
-{
-	if (prev_subbuf)
-		*((unsigned *)prev_subbuf) = prev_padding;
-
-	if (relay_buf_full(buf))
-		return 0;
-
-	subbuf_start_reserve(buf, sizeof(unsigned int));
-
-	return 1;
-}
-
-If the current buffer is full i.e. all sub-buffers remain unconsumed,
-the callback returns 0 to indicate that the buffer switch should not
-occur yet i.e. until the consumer has had a chance to read the current
-set of ready sub-buffers.  For the relay_buf_full() function to make
-sense, the consumer is reponsible for notifying relayfs when
-sub-buffers have been consumed via relay_subbufs_consumed().  Any
-subsequent attempts to write into the buffer will again invoke the
-subbuf_start() callback with the same parameters; only when the
-consumer has consumed one or more of the ready sub-buffers will
-relay_buf_full() return 0, in which case the buffer switch can
-continue.
-
-The implementation of the subbuf_start() callback for 'overwrite' mode
-would be very similar:
-
-static int subbuf_start(struct rchan_buf *buf,
-                        void *subbuf,
-			void *prev_subbuf,
-			unsigned int prev_padding)
-{
-	if (prev_subbuf)
-		*((unsigned *)prev_subbuf) = prev_padding;
-
-	subbuf_start_reserve(buf, sizeof(unsigned int));
-
-	return 1;
-}
-
-In this case, the relay_buf_full() check is meaningless and the
-callback always returns 1, causing the buffer switch to occur
-unconditionally.  It's also meaningless for the client to use the
-relay_subbufs_consumed() function in this mode, as it's never
-consulted.
-
-The default subbuf_start() implementation, used if the client doesn't
-define any callbacks, or doesn't define the subbuf_start() callback,
-implements the simplest possible 'no-overwrite' mode i.e. it does
-nothing but return 0.
-
-Header information can be reserved at the beginning of each sub-buffer
-by calling the subbuf_start_reserve() helper function from within the
-subbuf_start() callback.  This reserved area can be used to store
-whatever information the client wants.  In the example above, room is
-reserved in each sub-buffer to store the padding count for that
-sub-buffer.  This is filled in for the previous sub-buffer in the
-subbuf_start() implementation; the padding value for the previous
-sub-buffer is passed into the subbuf_start() callback along with a
-pointer to the previous sub-buffer, since the padding value isn't
-known until a sub-buffer is filled.  The subbuf_start() callback is
-also called for the first sub-buffer when the channel is opened, to
-give the client a chance to reserve space in it.  In this case the
-previous sub-buffer pointer passed into the callback will be NULL, so
-the client should check the value of the prev_subbuf pointer before
-writing into the previous sub-buffer.
-
-Writing to a channel
---------------------
-
-kernel clients write data into the current cpu's channel buffer using
-relay_write() or __relay_write().  relay_write() is the main logging
-function - it uses local_irqsave() to protect the buffer and should be
-used if you might be logging from interrupt context.  If you know
-you'll never be logging from interrupt context, you can use
-__relay_write(), which only disables preemption.  These functions
-don't return a value, so you can't determine whether or not they
-failed - the assumption is that you wouldn't want to check a return
-value in the fast logging path anyway, and that they'll always succeed
-unless the buffer is full and no-overwrite mode is being used, in
-which case you can detect a failed write in the subbuf_start()
-callback by calling the relay_buf_full() helper function.
-
-relay_reserve() is used to reserve a slot in a channel buffer which
-can be written to later.  This would typically be used in applications
-that need to write directly into a channel buffer without having to
-stage data in a temporary buffer beforehand.  Because the actual write
-may not happen immediately after the slot is reserved, applications
-using relay_reserve() can keep a count of the number of bytes actually
-written, either in space reserved in the sub-buffers themselves or as
-a separate array.  See the 'reserve' example in the relay-apps tarball
-at http://relayfs.sourceforge.net for an example of how this can be
-done.  Because the write is under control of the client and is
-separated from the reserve, relay_reserve() doesn't protect the buffer
-at all - it's up to the client to provide the appropriate
-synchronization when using relay_reserve().
-
-Closing a channel
------------------
-
-The client calls relay_close() when it's finished using the channel.
-The channel and its associated buffers are destroyed when there are no
-longer any references to any of the channel buffers.  relay_flush()
-forces a sub-buffer switch on all the channel buffers, and can be used
-to finalize and process the last sub-buffers before the channel is
-closed.
-
-Creating non-relay files
-------------------------
-
-relay_open() automatically creates files in the relayfs filesystem to
-represent the per-cpu kernel buffers; it's often useful for
-applications to be able to create their own files alongside the relay
-files in the relayfs filesystem as well e.g. 'control' files much like
-those created in /proc or debugfs for similar purposes, used to
-communicate control information between the kernel and user sides of a
-relayfs application.  For this purpose the relayfs_create_file() and
-relayfs_remove_file() API functions exist.  For relayfs_create_file(),
-the caller passes in a set of user-defined file operations to be used
-for the file and an optional void * to a user-specified data item,
-which will be accessible via inode->u.generic_ip (see the relay-apps
-tarball for examples).  The file_operations are a required parameter
-to relayfs_create_file() and thus the semantics of these files are
-completely defined by the caller.
-
-See the relay-apps tarball at http://relayfs.sourceforge.net for
-examples of how these non-relay files are meant to be used.
-
-Creating relay files in other filesystems
------------------------------------------
-
-By default of course, relay_open() creates relay files in the relayfs
-filesystem.  Because relay_file_operations is exported, however, it's
-also possible to create and use relay files in other pseudo-filesytems
-such as debugfs.
-
-For this purpose, two callback functions are provided,
-create_buf_file() and remove_buf_file().  create_buf_file() is called
-once for each per-cpu buffer from relay_open() to allow the client to
-create a file to be used to represent the corresponding buffer; if
-this callback is not defined, the default implementation will create
-and return a file in the relayfs filesystem to represent the buffer.
-The callback should return the dentry of the file created to represent
-the relay buffer.  Note that the parent directory passed to
-relay_open() (and passed along to the callback), if specified, must
-exist in the same filesystem the new relay file is created in.  If
-create_buf_file() is defined, remove_buf_file() must also be defined;
-it's responsible for deleting the file(s) created in create_buf_file()
-and is called during relay_close().
-
-The create_buf_file() implementation can also be defined in such a way
-as to allow the creation of a single 'global' buffer instead of the
-default per-cpu set.  This can be useful for applications interested
-mainly in seeing the relative ordering of system-wide events without
-the need to bother with saving explicit timestamps for the purpose of
-merging/sorting per-cpu files in a postprocessing step.
-
-To have relay_open() create a global buffer, the create_buf_file()
-implementation should set the value of the is_global outparam to a
-non-zero value in addition to creating the file that will be used to
-represent the single buffer.  In the case of a global buffer,
-create_buf_file() and remove_buf_file() will be called only once.  The
-normal channel-writing functions e.g. relay_write() can still be used
-- writes from any cpu will transparently end up in the global buffer -
-but since it is a global buffer, callers should make sure they use the
-proper locking for such a buffer, either by wrapping writes in a
-spinlock, or by copying a write function from relayfs_fs.h and
-creating a local version that internally does the proper locking.
-
-See the 'exported-relayfile' examples in the relay-apps tarball for
-examples of creating and using relay files in debugfs.
-
-Misc
-----
-
-Some applications may want to keep a channel around and re-use it
-rather than open and close a new channel for each use.  relay_reset()
-can be used for this purpose - it resets a channel to its initial
-state without reallocating channel buffer memory or destroying
-existing mappings.  It should however only be called when it's safe to
-do so i.e. when the channel isn't currently being written to.
-
-Finally, there are a couple of utility callbacks that can be used for
-different purposes.  buf_mapped() is called whenever a channel buffer
-is mmapped from user space and buf_unmapped() is called when it's
-unmapped.  The client can use this notification to trigger actions
-within the kernel application, such as enabling/disabling logging to
-the channel.
-
-
-Resources
-=========
-
-For news, example code, mailing list, etc. see the relayfs homepage:
-
-    http://relayfs.sourceforge.net
-
-
-Credits
-=======
-
-The ideas and specs for relayfs came about as a result of discussions
-on tracing involving the following:
-
-Michel Dagenais		<michel.dagenais@polymtl.ca>
-Richard Moore		<richardj_moore@uk.ibm.com>
-Bob Wisniewski		<bob@watson.ibm.com>
-Karim Yaghmour		<karim@opersys.com>
-Tom Zanussi		<zanussi@us.ibm.com>
-
-Also thanks to Hubertus Franke for a lot of useful suggestions and bug
-reports.
-- 
cgit v0.10.2


From 45f17e0c2ae05c133a348452690de0e5fa863293 Mon Sep 17 00:00:00 2001
From: Masoud Asgharifard Sharbiani <masouds@google.com>
Date: Sun, 27 Aug 2006 01:23:48 -0700
Subject: [PATCH] eventpoll.c compile fix

Fix two compile failures in eventpoll.c code which would happen if
DEBUG_EPOLL is bigger than zero.

Signed-off-by: Masoud Sharbiani <masouds@google.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 19ffb04..3a35674 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 eexit_1:
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
-		     current, ep, epi->file, error));
+		     current, ep, epi->ffd.file, error));
 
 	return error;
 }
@@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 	struct eventpoll *ep = epi->ep;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
-		     current, epi->file, epi, ep));
+		     current, epi->ffd.file, epi, ep));
 
 	write_lock_irqsave(&ep->lock, flags);
 
-- 
cgit v0.10.2


From 84692195969b83f0ba57dc33ecf73e6c124dd186 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 27 Aug 2006 01:23:49 -0700
Subject: [PATCH] md: avoid backward event updates in md superblock when
 degraded.

If we
  - shut down a clean array,
  - restart with one (or more) drive(s) missing
  - make some changes
  - pause, so that they array gets marked 'clean',
the event count on the superblock of included drives
will be the same as that of the removed drives.
So adding the removed drive back in will cause it
to be included with no resync.

To avoid this, we only update the eventcount backwards when the array
is not degraded.  In this case there can (should) be no non-connected
drives that we can get confused with, and this is the particular case
where updating-backwards is valuable.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b6d1602..8dbab2e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1597,6 +1597,19 @@ void md_update_sb(mddev_t * mddev)
 
 repeat:
 	spin_lock_irq(&mddev->write_lock);
+
+	if (mddev->degraded && mddev->sb_dirty == 3)
+		/* If the array is degraded, then skipping spares is both
+		 * dangerous and fairly pointless.
+		 * Dangerous because a device that was removed from the array
+		 * might have a event_count that still looks up-to-date,
+		 * so it can be re-added without a resync.
+		 * Pointless because if there are any spares to skip,
+		 * then a recovery will happen and soon that array won't
+		 * be degraded any more and the spare can go back to sleep then.
+		 */
+		mddev->sb_dirty = 1;
+
 	sync_req = mddev->in_sync;
 	mddev->utime = get_seconds();
 	if (mddev->sb_dirty == 3)
-- 
cgit v0.10.2


From 6394cca54894f6a9bcf927ab78d28985944298ff Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 27 Aug 2006 01:23:50 -0700
Subject: [PATCH] md: fix recent breakage of md/raid1 array checking

A recent patch broke the ability to do a user-request check of a raid1.
This patch fixes the breakage and also moves a comment that was dislocated
by the same patch.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 1efe22a..87bfe9e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1625,15 +1625,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return 0;
 	}
 
-	/* before building a request, check if we can skip these blocks..
-	 * This call the bitmap_start_sync doesn't actually record anything
-	 */
 	if (mddev->bitmap == NULL &&
 	    mddev->recovery_cp == MaxSector &&
+	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
 	    conf->fullsync == 0) {
 		*skipped = 1;
 		return max_sector - sector_nr;
 	}
+	/* before building a request, check if we can skip these blocks..
+	 * This call the bitmap_start_sync doesn't actually record anything
+	 */
 	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
 	    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
 		/* We can skip this block, and probably several more */
-- 
cgit v0.10.2


From 4c4d50f7b39cc58f1064b93a61ad617451ae41df Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Sun, 27 Aug 2006 01:23:51 -0700
Subject: [PATCH] cpuset: top_cpuset tracks hotplug changes to cpu_online_map

Change the list of cpus allowed to tasks in the top (root) cpuset to
dynamically track what cpus are online, using a CPU hotplug notifier.  Make
this top cpus file read-only.

On systems that have cpusets configured in their kernel, but that aren't
actively using cpusets (for some distros, this covers the majority of
systems) all tasks end up in the top cpuset.

If that system does support CPU hotplug, then these tasks cannot make use
of CPUs that are added after system boot, because the CPUs are not allowed
in the top cpuset.  This is a surprising regression over earlier kernels
that didn't have cpusets enabled.

In order to keep the behaviour of cpusets consistent between systems
actively making use of them and systems not using them, this patch changes
the behaviour of the 'cpus' file in the top (root) cpuset, making it read
only, and making it automatically track the value of cpu_online_map.  Thus
tasks in the top cpuset will have automatic use of hot plugged CPUs allowed
by their cpuset.

Thanks to Anton Blanchard and Nathan Lynch for reporting this problem,
driving the fix, and earlier versions of this patch.

Signed-off-by: Paul Jackson <pj@sgi.com>
Cc: Nathan Lynch <ntl@pobox.com>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 159e2a0..76b4429 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -217,6 +217,12 @@ exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
 to represent the cpuset hierarchy provides for a familiar permission
 and name space for cpusets, with a minimum of additional kernel code.
 
+The cpus file in the root (top_cpuset) cpuset is read-only.
+It automatically tracks the value of cpu_online_map, using a CPU
+hotplug notifier.  If and when memory nodes can be hotplugged,
+we expect to make the mems file in the root cpuset read-only
+as well, and have it track the value of node_online_map.
+
 
 1.4 What are exclusive cpusets ?
 --------------------------------
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1a649f2..f1dda98 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -816,6 +816,10 @@ static int update_cpumask(struct cpuset *cs, char *buf)
 	struct cpuset trialcs;
 	int retval, cpus_unchanged;
 
+	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
+	if (cs == &top_cpuset)
+		return -EACCES;
+
 	trialcs = *cs;
 	retval = cpulist_parse(buf, trialcs.cpus_allowed);
 	if (retval < 0)
@@ -2033,6 +2037,33 @@ out:
 	return err;
 }
 
+/*
+ * The top_cpuset tracks what CPUs and Memory Nodes are online,
+ * period.  This is necessary in order to make cpusets transparent
+ * (of no affect) on systems that are actively using CPU hotplug
+ * but making no active use of cpusets.
+ *
+ * This handles CPU hotplug (cpuhp) events.  If someday Memory
+ * Nodes can be hotplugged (dynamically changing node_online_map)
+ * then we should handle that too, perhaps in a similar way.
+ */
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int cpuset_handle_cpuhp(struct notifier_block *nb,
+				unsigned long phase, void *cpu)
+{
+	mutex_lock(&manage_mutex);
+	mutex_lock(&callback_mutex);
+
+	top_cpuset.cpus_allowed = cpu_online_map;
+
+	mutex_unlock(&callback_mutex);
+	mutex_unlock(&manage_mutex);
+
+	return 0;
+}
+#endif
+
 /**
  * cpuset_init_smp - initialize cpus_allowed
  *
@@ -2043,6 +2074,8 @@ void __init cpuset_init_smp(void)
 {
 	top_cpuset.cpus_allowed = cpu_online_map;
 	top_cpuset.mems_allowed = node_online_map;
+
+	hotcpu_notifier(cpuset_handle_cpuhp, 0);
 }
 
 /**
-- 
cgit v0.10.2


From ea817398e68dfa25612229fda7fc74580cf915fb Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 27 Aug 2006 01:23:52 -0700
Subject: [PATCH] Manage jbd allocations from its own slabs

JBD currently allocates commit and frozen buffers from slabs.  With
CONFIG_SLAB_DEBUG, its possible for an allocation to cross the page
boundary causing IO problems.

https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=200127

So, instead of allocating these from regular slabs - manage allocation from
its own slabs and disable slab debug for these slabs.

[akpm@osdl.org: cleanups]
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 0971814..42da607 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal)
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			kfree(jh->b_committed_data);
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
@@ -745,14 +745,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			kfree(jh->b_committed_data);
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			kfree(jh->b_frozen_data);
+			jbd_slab_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 8c9b28d..f66724c 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
+static int journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -328,10 +329,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			kfree(tmp);
+			jbd_slab_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal)
 int journal_load(journal_t *journal)
 {
 	int err;
+	journal_superblock_t *sb;
 
 	err = load_superblock(journal);
 	if (err)
 		return err;
 
+	sb = journal->j_superblock;
 	/* If this is a V2 superblock, then we have to check the
 	 * features flags on it. */
 
 	if (journal->j_format_version >= 2) {
-		journal_superblock_t *sb = journal->j_superblock;
-
 		if ((sb->s_feature_ro_compat &
 		     ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
 		    (sb->s_feature_incompat &
@@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal)
 		}
 	}
 
+	/*
+	 * Create a slab for this blocksize
+	 */
+	err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize));
+	if (err)
+		return err;
+
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (journal_recover(journal))
@@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 }
 
 /*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
+ * and allocate frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+
+#define JBD_MAX_SLABS 5
+#define JBD_SLAB_INDEX(size)  (size >> 11)
+
+static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+};
+
+static void journal_destroy_jbd_slabs(void)
+{
+	int i;
+
+	for (i = 0; i < JBD_MAX_SLABS; i++) {
+		if (jbd_slab[i])
+			kmem_cache_destroy(jbd_slab[i]);
+		jbd_slab[i] = NULL;
+	}
+}
+
+static int journal_create_jbd_slab(size_t slab_size)
+{
+	int i = JBD_SLAB_INDEX(slab_size);
+
+	BUG_ON(i >= JBD_MAX_SLABS);
+
+	/*
+	 * Check if we already have a slab created for this size
+	 */
+	if (jbd_slab[i])
+		return 0;
+
+	/*
+	 * Create a slab and force alignment to be same as slabsize -
+	 * this will make sure that allocations won't cross the page
+	 * boundary.
+	 */
+	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+				slab_size, slab_size, 0, NULL, NULL);
+	if (!jbd_slab[i]) {
+		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+
+void jbd_slab_free(void *ptr,  size_t size)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	kmem_cache_free(jbd_slab[idx], ptr);
+}
+
+/*
  * Journal_head storage management
  */
 static kmem_cache_t *journal_head_cache;
@@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				kfree(jh->b_frozen_data);
+				jbd_slab_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				kfree(jh->b_committed_data);
+				jbd_slab_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void)
 	journal_destroy_revoke_caches();
 	journal_destroy_journal_head_cache();
 	journal_destroy_handle_cache();
+	journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 508b2ea..de2e4cb 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -666,8 +666,9 @@ repeat:
 			if (!frozen_buffer) {
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
-				frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
-							    GFP_NOFS);
+				frozen_buffer =
+					jbd_slab_alloc(jh2bh(jh)->b_size,
+							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
 					       "%s: OOM for frozen_buffer\n",
@@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -906,7 +907,7 @@ repeat:
 out:
 	journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		kfree(committed_data);
+		jbd_slab_free(committed_data, bh->b_size);
 	return err;
 }
 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 20eb344..a04c154 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -72,6 +72,9 @@ extern int journal_enable_debug;
 #endif
 
 extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
+extern void * jbd_slab_alloc(size_t size, gfp_t flags);
+extern void jbd_slab_free(void *ptr, size_t size);
+
 #define jbd_kmalloc(size, flags) \
 	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
 #define jbd_rep_kmalloc(size, flags) \
-- 
cgit v0.10.2


From 36920e069a87c6fccffbc0ec5e74985d94d3bcc4 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sun, 27 Aug 2006 01:23:52 -0700
Subject: [PATCH] register_one_node() compile fix

register_one_node()'s should be defined under CONFIG_NUMA=n.
fixes following bug.

  CC	  init/version.o
  LD	  init/built-in.o
  LD	  .tmp_vmlinux1
  mm/built-in.o: In function `add_memory': undefined reference to `register_one_node'

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/node.h b/include/linux/node.h
index 81dcec8..bc001bc 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -30,12 +30,20 @@ extern struct node node_devices[];
 
 extern int register_node(struct node *, int, struct node *);
 extern void unregister_node(struct node *node);
+#ifdef CONFIG_NUMA
 extern int register_one_node(int nid);
 extern void unregister_one_node(int nid);
-#ifdef CONFIG_NUMA
 extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
 #else
+static inline int register_one_node(int nid)
+{
+	return 0;
+}
+static inline int unregister_one_node(int nid)
+{
+	return 0;
+}
 static inline int register_cpu_under_node(unsigned int cpu, unsigned int nid)
 {
 	return 0;
-- 
cgit v0.10.2


From 0d673a5a4775d3dc565b6668ed75fd2db2ede624 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sun, 27 Aug 2006 01:23:54 -0700
Subject: [PATCH] cpuset: oom panic fix

cpuset_excl_nodes_overlap always returns 0 if current is exiting.  This caused
customer's systems to panic in the OOM killer when processes were having
trouble getting memory for the final put_user in mm_release.  Even though
there were lots of processes to kill.

Change to returning 1 in this case.  This achieves parity with !CONFIG_CPUSETS
case, and was observed to fix the problem.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f1dda98..4ea6f0d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2420,7 +2420,7 @@ EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
 int cpuset_excl_nodes_overlap(const struct task_struct *p)
 {
 	const struct cpuset *cs1, *cs2;	/* my and p's cpuset ancestors */
-	int overlap = 0;		/* do cpusets overlap? */
+	int overlap = 1;		/* do cpusets overlap? */
 
 	task_lock(current);
 	if (current->flags & PF_EXITING) {
-- 
cgit v0.10.2


From 4e54bdaa9cf55d7e74409ea288b705c144b2627e Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sun, 27 Aug 2006 01:23:55 -0700
Subject: [PATCH] CONFIG_ACPI_SRAT NUMA build fix

  In file included from include/asm/mmzone.h:18,
                   from include/linux/mmzone.h:439,
  <snip>
  include/asm/srat.h:31:2: error: #error CONFIG_ACPI_SRAT not defined, and srat.h header has been included
  make[1]: *** [arch/i386/kernel/asm-offsets.s] Error 1

This can happen with CONFIG_NUMA && !CONFIG_ACPI && !CONFIG_X86_NUMAQ

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index e33e9f9..22cb07c 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -14,7 +14,7 @@ extern struct pglist_data *node_data[];
 
 #ifdef CONFIG_X86_NUMAQ
 	#include <asm/numaq.h>
-#else	/* summit or generic arch */
+#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */
 	#include <asm/srat.h>
 #endif
 
-- 
cgit v0.10.2


From 36e8e5783297fbb83bdebe7e245ef659958f23cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 27 Aug 2006 01:23:56 -0700
Subject: [PATCH] lockdep: annotate idescsi_pc_intr()

idescsi_pc_intr() uses local_irq_enable() in IRQ context: annotate it.

(this has no effect on kernels with lockdep disabled.  On kernels with lockdep
enabled this means that we wont actually disable interrupts, and the warning
message will go away as well.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index f7b5d73..94d1de5 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -517,7 +517,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
 		/* No more interrupts */
 		if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
 			printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred);
-		local_irq_enable();
+		local_irq_enable_in_hardirq();
 		if (status.b.check)
 			rq->errors++;
 		idescsi_end_request (drive, 1, 0);
-- 
cgit v0.10.2


From 4df46240a1312161e3c794f6ace50ef7eb5ff3d7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 27 Aug 2006 01:23:56 -0700
Subject: [PATCH] lockdep: annotate reiserfs

reiserfs seems to have another locking level layer for the i_mutex due to the
xattrs-are-a-directory thing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 39fedaa..d935fb9 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
 	int res = -ENOTDIR;
 	if (!file->f_op || !file->f_op->readdir)
 		goto out;
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
 //        down(&inode->i_zombie);
 	res = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2561020..555bc19 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -570,13 +570,14 @@ struct inode {
  * 3: quota file
  *
  * The locking order between these classes is
- * parent -> child -> normal -> quota
+ * parent -> child -> normal -> xattr -> quota
  */
 enum inode_i_mutex_lock_class
 {
 	I_MUTEX_NORMAL,
 	I_MUTEX_PARENT,
 	I_MUTEX_CHILD,
+	I_MUTEX_XATTR,
 	I_MUTEX_QUOTA
 };
 
-- 
cgit v0.10.2


From 513627d7fec6fcb7b3d56ce355cb4d192c76b530 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Sun, 27 Aug 2006 01:23:57 -0700
Subject: [PATCH] fix up lockdep trace in fs/exec.c

This fixes the locking error noticed by lockdep:

  =============================================
  [ INFO: possible recursive locking detected ]
  ---------------------------------------------
  init/1 is trying to acquire lock:
   (&sighand->siglock){....}, at: [<c047a78a>] flush_old_exec+0x3ae/0x859

  but task is already holding lock:
   (&sighand->siglock){....}, at: [<c047a77a>] flush_old_exec+0x39e/0x859

  other info that might help us debug this:
  2 locks held by init/1:
   #0:  (tasklist_lock){..--}, at: [<c047a76a>] flush_old_exec+0x38e/0x859
   #1:  (&sighand->siglock){....}, at: [<c047a77a>] flush_old_exec+0x39e/0x859

  stack backtrace:
   [<c04051e1>] show_trace_log_lvl+0x54/0xfd
   [<c040579d>] show_trace+0xd/0x10
   [<c04058b6>] dump_stack+0x19/0x1b
   [<c043b33a>] __lock_acquire+0x773/0x997
   [<c043bacf>] lock_acquire+0x4b/0x6c
   [<c060630b>] _spin_lock+0x19/0x28
   [<c047a78a>] flush_old_exec+0x3ae/0x859
   [<c0498053>] load_elf_binary+0x4aa/0x1628
   [<c0479cab>] search_binary_handler+0xa7/0x24e
   [<c047b577>] do_execve+0x15b/0x1f9
   [<c04022b4>] sys_execve+0x29/0x4d
   [<c0403faf>] syscall_call+0x7/0xb

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/exec.c b/fs/exec.c
index f7aabfe..54135df 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -751,7 +751,7 @@ no_thread_group:
 
 		write_lock_irq(&tasklist_lock);
 		spin_lock(&oldsighand->siglock);
-		spin_lock(&newsighand->siglock);
+		spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
 
 		rcu_assign_pointer(current->sighand, newsighand);
 		recalc_sigpending();
-- 
cgit v0.10.2


From f5ef68da5fda5e095b585ea5ecdd42af3c8695f7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 27 Aug 2006 01:23:58 -0700
Subject: [PATCH] /proc/meminfo: don't put spaces in names

None of the other /proc/meminfo lines have a space in the identifier.  This
post-2.6.17 addition has the potential to break existing parsers, so use an
underscore instead (like Committed_AS).

Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/base/node.c b/drivers/base/node.c
index d7de1753..e9b0957 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -64,7 +64,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
 		       "Node %d Mapped:       %8lu kB\n"
 		       "Node %d AnonPages:    %8lu kB\n"
 		       "Node %d PageTables:   %8lu kB\n"
-		       "Node %d NFS Unstable: %8lu kB\n"
+		       "Node %d NFS_Unstable: %8lu kB\n"
 		       "Node %d Bounce:       %8lu kB\n"
 		       "Node %d Slab:         %8lu kB\n",
 		       nid, K(i.totalram),
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 9f2cfc3..9421562 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 		"Mapped:       %8lu kB\n"
 		"Slab:         %8lu kB\n"
 		"PageTables:   %8lu kB\n"
-		"NFS Unstable: %8lu kB\n"
+		"NFS_Unstable: %8lu kB\n"
 		"Bounce:       %8lu kB\n"
 		"CommitLimit:  %8lu kB\n"
 		"Committed_AS: %8lu kB\n"
-- 
cgit v0.10.2


From 38e716aa016dfd8466c11500c56ab7773344fdf7 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sun, 27 Aug 2006 01:24:00 -0700
Subject: [PATCH] x86: NUMAQ Kconfig fix

When we select NUMA with i386, the system is only X86_NUMAQ or using ACPI.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index f71fb4a..b2751ea 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -142,6 +142,7 @@ config X86_SUMMIT
 	  In particular, it is needed for the x440.
 
 	  If you don't have one of these computers, you should say N here.
+	  If you want to build a NUMA kernel, you must select ACPI.
 
 config X86_BIGSMP
 	bool "Support for other sub-arch SMP systems with more than 8 CPUs"
@@ -169,6 +170,7 @@ config X86_GENERICARCH
        help
           This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
 	  It is intended for a generic binary kernel.
+	  If you want a NUMA kernel, select ACPI.   We need SRAT for NUMA.
 
 config X86_ES7000
 	bool "Support for Unisys ES7000 IA32 series"
@@ -542,7 +544,7 @@ config X86_PAE
 # Common NUMA Features
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support"
-	depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
+	depends on SMP && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI)
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT)
 
-- 
cgit v0.10.2


From 9c275a8391d96b49fa135d1e4073d4798b7c6445 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sun, 27 Aug 2006 01:24:00 -0700
Subject: [PATCH] cdrom/gdsc: fix printk format warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix printk format warning:
drivers/cdrom/gscd.c:269: warning: format â%luâ expects type âlong unsigned intâ, but argument 2 has type âunsigned intâ

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/cdrom/gscd.c b/drivers/cdrom/gscd.c
index b6ee50a..fa70824 100644
--- a/drivers/cdrom/gscd.c
+++ b/drivers/cdrom/gscd.c
@@ -266,7 +266,7 @@ repeat:
 		goto out;
 
 	if (req->cmd != READ) {
-		printk("GSCD: bad cmd %lu\n", rq_data_dir(req));
+		printk("GSCD: bad cmd %u\n", rq_data_dir(req));
 		end_request(req, 0);
 		goto repeat;
 	}
-- 
cgit v0.10.2


From af9b897ee639d96b2bd29b65b50cd0a1f2b6d6c9 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Sun, 27 Aug 2006 01:24:01 -0700
Subject: [PATCH] tty layer comment the locking assumptions and functions
 somewhat

Doesn't fix them but does show up some interesting areas that need review
and fixing.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index d6e4eaa..2cef982 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -153,6 +153,15 @@ int tty_ioctl(struct inode * inode, struct file * file,
 static int tty_fasync(int fd, struct file * filp, int on);
 static void release_mem(struct tty_struct *tty, int idx);
 
+/**
+ *	alloc_tty_struct	-	allocate a tty object
+ *
+ *	Return a new empty tty structure. The data fields have not
+ *	been initialized in any way but has been zeroed
+ *
+ *	Locking: none
+ *	FIXME: use kzalloc
+ */
 
 static struct tty_struct *alloc_tty_struct(void)
 {
@@ -166,6 +175,15 @@ static struct tty_struct *alloc_tty_struct(void)
 
 static void tty_buffer_free_all(struct tty_struct *);
 
+/**
+ *	free_tty_struct		-	free a disused tty
+ *	@tty: tty struct to free
+ *
+ *	Free the write buffers, tty queue and tty memory itself.
+ *
+ *	Locking: none. Must be called after tty is definitely unused
+ */
+
 static inline void free_tty_struct(struct tty_struct *tty)
 {
 	kfree(tty->write_buf);
@@ -175,6 +193,17 @@ static inline void free_tty_struct(struct tty_struct *tty)
 
 #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
 
+/**
+ *	tty_name	-	return tty naming
+ *	@tty: tty structure
+ *	@buf: buffer for output
+ *
+ *	Convert a tty structure into a name. The name reflects the kernel
+ *	naming policy and if udev is in use may not reflect user space
+ *
+ *	Locking: none
+ */
+
 char *tty_name(struct tty_struct *tty, char *buf)
 {
 	if (!tty) /* Hmm.  NULL pointer.  That's fun. */
@@ -235,6 +264,17 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
  * Tty buffer allocation management
  */
 
+
+/**
+ *	tty_buffer_free_all		-	free buffers used by a tty
+ *	@tty: tty to free from
+ *
+ *	Remove all the buffers pending on a tty whether queued with data
+ *	or in the free ring. Must be called when the tty is no longer in use
+ *
+ *	Locking: none
+ */
+
 static void tty_buffer_free_all(struct tty_struct *tty)
 {
 	struct tty_buffer *thead;
@@ -347,6 +387,18 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 }
 EXPORT_SYMBOL_GPL(tty_buffer_request_room);
 
+/**
+ *	tty_insert_flip_string	-	Add characters to the tty buffer
+ *	@tty: tty structure
+ *	@chars: characters
+ *	@size: size
+ *
+ *	Queue a series of bytes to the tty buffering. All the characters
+ *	passed are marked as without error. Returns the number added.
+ *
+ *	Locking: Called functions may take tty->buf.lock
+ */
+
 int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 				size_t size)
 {
@@ -370,6 +422,20 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 }
 EXPORT_SYMBOL(tty_insert_flip_string);
 
+/**
+ *	tty_insert_flip_string_flags	-	Add characters to the tty buffer
+ *	@tty: tty structure
+ *	@chars: characters
+ *	@flags: flag bytes
+ *	@size: size
+ *
+ *	Queue a series of bytes to the tty buffering. For each character
+ *	the flags array indicates the status of the character. Returns the
+ *	number added.
+ *
+ *	Locking: Called functions may take tty->buf.lock
+ */
+
 int tty_insert_flip_string_flags(struct tty_struct *tty,
 		const unsigned char *chars, const char *flags, size_t size)
 {
@@ -394,6 +460,17 @@ int tty_insert_flip_string_flags(struct tty_struct *tty,
 }
 EXPORT_SYMBOL(tty_insert_flip_string_flags);
 
+/**
+ *	tty_schedule_flip	-	push characters to ldisc
+ *	@tty: tty to push from
+ *
+ *	Takes any pending buffers and transfers their ownership to the
+ *	ldisc side of the queue. It then schedules those characters for
+ *	processing by the line discipline.
+ *
+ *	Locking: Takes tty->buf.lock
+ */
+
 void tty_schedule_flip(struct tty_struct *tty)
 {
 	unsigned long flags;
@@ -405,12 +482,19 @@ void tty_schedule_flip(struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_schedule_flip);
 
-/*
+/**
+ *	tty_prepare_flip_string		-	make room for characters
+ *	@tty: tty
+ *	@chars: return pointer for character write area
+ *	@size: desired size
+ *
  *	Prepare a block of space in the buffer for data. Returns the length
  *	available and buffer pointer to the space which is now allocated and
  *	accounted for as ready for normal characters. This is used for drivers
  *	that need their own block copy routines into the buffer. There is no
  *	guarantee the buffer is a DMA target!
+ *
+ *	Locking: May call functions taking tty->buf.lock
  */
 
 int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size)
@@ -427,12 +511,20 @@ int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_
 
 EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
 
-/*
+/**
+ *	tty_prepare_flip_string_flags	-	make room for characters
+ *	@tty: tty
+ *	@chars: return pointer for character write area
+ *	@flags: return pointer for status flag write area
+ *	@size: desired size
+ *
  *	Prepare a block of space in the buffer for data. Returns the length
  *	available and buffer pointer to the space which is now allocated and
  *	accounted for as ready for characters. This is used for drivers
  *	that need their own block copy routines into the buffer. There is no
  *	guarantee the buffer is a DMA target!
+ *
+ *	Locking: May call functions taking tty->buf.lock
  */
 
 int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size)
@@ -451,10 +543,16 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags);
 
 
-/*
+/**
+ *	tty_set_termios_ldisc		-	set ldisc field
+ *	@tty: tty structure
+ *	@num: line discipline number
+ *
  *	This is probably overkill for real world processors but
  *	they are not on hot paths so a little discipline won't do 
  *	any harm.
+ *
+ *	Locking: takes termios_sem
  */
  
 static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
@@ -474,6 +572,19 @@ static DEFINE_SPINLOCK(tty_ldisc_lock);
 static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait);
 static struct tty_ldisc tty_ldiscs[NR_LDISCS];	/* line disc dispatch table */
 
+/**
+ *	tty_register_ldisc	-	install a line discipline
+ *	@disc: ldisc number
+ *	@new_ldisc: pointer to the ldisc object
+ *
+ *	Installs a new line discipline into the kernel. The discipline
+ *	is set up as unreferenced and then made available to the kernel
+ *	from this point onwards.
+ *
+ *	Locking:
+ *		takes tty_ldisc_lock to guard against ldisc races
+ */
+
 int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
 {
 	unsigned long flags;
@@ -493,6 +604,18 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
 }
 EXPORT_SYMBOL(tty_register_ldisc);
 
+/**
+ *	tty_unregister_ldisc	-	unload a line discipline
+ *	@disc: ldisc number
+ *	@new_ldisc: pointer to the ldisc object
+ *
+ *	Remove a line discipline from the kernel providing it is not
+ *	currently in use.
+ *
+ *	Locking:
+ *		takes tty_ldisc_lock to guard against ldisc races
+ */
+
 int tty_unregister_ldisc(int disc)
 {
 	unsigned long flags;
@@ -512,6 +635,19 @@ int tty_unregister_ldisc(int disc)
 }
 EXPORT_SYMBOL(tty_unregister_ldisc);
 
+/**
+ *	tty_ldisc_get		-	take a reference to an ldisc
+ *	@disc: ldisc number
+ *
+ *	Takes a reference to a line discipline. Deals with refcounts and
+ *	module locking counts. Returns NULL if the discipline is not available.
+ *	Returns a pointer to the discipline and bumps the ref count if it is
+ *	available
+ *
+ *	Locking:
+ *		takes tty_ldisc_lock to guard against ldisc races
+ */
+
 struct tty_ldisc *tty_ldisc_get(int disc)
 {
 	unsigned long flags;
@@ -540,6 +676,17 @@ struct tty_ldisc *tty_ldisc_get(int disc)
 
 EXPORT_SYMBOL_GPL(tty_ldisc_get);
 
+/**
+ *	tty_ldisc_put		-	drop ldisc reference
+ *	@disc: ldisc number
+ *
+ *	Drop a reference to a line discipline. Manage refcounts and
+ *	module usage counts
+ *
+ *	Locking:
+ *		takes tty_ldisc_lock to guard against ldisc races
+ */
+
 void tty_ldisc_put(int disc)
 {
 	struct tty_ldisc *ld;
@@ -557,6 +704,19 @@ void tty_ldisc_put(int disc)
 	
 EXPORT_SYMBOL_GPL(tty_ldisc_put);
 
+/**
+ *	tty_ldisc_assign	-	set ldisc on a tty
+ *	@tty: tty to assign
+ *	@ld: line discipline
+ *
+ *	Install an instance of a line discipline into a tty structure. The
+ *	ldisc must have a reference count above zero to ensure it remains/
+ *	The tty instance refcount starts at zero.
+ *
+ *	Locking:
+ *		Caller must hold references
+ */
+
 static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
 {
 	tty->ldisc = *ld;
@@ -571,6 +731,8 @@ static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
  *	the tty ldisc. Return 0 on failure or 1 on success. This is
  *	used to implement both the waiting and non waiting versions
  *	of tty_ldisc_ref
+ *
+ *	Locking: takes tty_ldisc_lock
  */
 
 static int tty_ldisc_try(struct tty_struct *tty)
@@ -602,6 +764,8 @@ static int tty_ldisc_try(struct tty_struct *tty)
  *	must also be careful not to hold other locks that will deadlock
  *	against a discipline change, such as an existing ldisc reference
  *	(which we check for)
+ *
+ *	Locking: call functions take tty_ldisc_lock
  */
  
 struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
@@ -622,6 +786,8 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
  *	Dereference the line discipline for the terminal and take a 
  *	reference to it. If the line discipline is in flux then 
  *	return NULL. Can be called from IRQ and timer functions.
+ *
+ *	Locking: called functions take tty_ldisc_lock
  */
  
 struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
@@ -639,6 +805,8 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref);
  *
  *	Undoes the effect of tty_ldisc_ref or tty_ldisc_ref_wait. May
  *	be called in IRQ context.
+ *
+ *	Locking: takes tty_ldisc_lock
  */
  
 void tty_ldisc_deref(struct tty_ldisc *ld)
@@ -683,6 +851,9 @@ static void tty_ldisc_enable(struct tty_struct *tty)
  *
  *	Set the discipline of a tty line. Must be called from a process
  *	context.
+ *
+ *	Locking: takes tty_ldisc_lock.
+ *		called functions take termios_sem
  */
  
 static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
@@ -846,9 +1017,17 @@ restart:
 	return retval;
 }
 
-/*
- * This routine returns a tty driver structure, given a device number
+/**
+ *	get_tty_driver		-	find device of a tty
+ *	@dev_t: device identifier
+ *	@index: returns the index of the tty
+ *
+ *	This routine returns a tty driver structure, given a device number
+ *	and also passes back the index number.
+ *
+ *	Locking: caller must hold tty_mutex
  */
+
 static struct tty_driver *get_tty_driver(dev_t device, int *index)
 {
 	struct tty_driver *p;
@@ -863,11 +1042,17 @@ static struct tty_driver *get_tty_driver(dev_t device, int *index)
 	return NULL;
 }
 
-/*
- * If we try to write to, or set the state of, a terminal and we're
- * not in the foreground, send a SIGTTOU.  If the signal is blocked or
- * ignored, go ahead and perform the operation.  (POSIX 7.2)
+/**
+ *	tty_check_change	-	check for POSIX terminal changes
+ *	@tty: tty to check
+ *
+ *	If we try to write to, or set the state of, a terminal and we're
+ *	not in the foreground, send a SIGTTOU.  If the signal is blocked or
+ *	ignored, go ahead and perform the operation.  (POSIX 7.2)
+ *
+ *	Locking: none
  */
+
 int tty_check_change(struct tty_struct * tty)
 {
 	if (current->signal->tty != tty)
@@ -1005,10 +1190,27 @@ void tty_ldisc_flush(struct tty_struct *tty)
 
 EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 	
-/*
- * This can be called by the "eventd" kernel thread.  That is process synchronous,
- * but doesn't hold any locks, so we need to make sure we have the appropriate
- * locks for what we're doing..
+/**
+ *	do_tty_hangup		-	actual handler for hangup events
+ *	@data: tty device
+ *
+ *	This can be called by the "eventd" kernel thread.  That is process
+ *	synchronous but doesn't hold any locks, so we need to make sure we
+ *	have the appropriate locks for what we're doing.
+ *
+ *	The hangup event clears any pending redirections onto the hung up
+ *	device. It ensures future writes will error and it does the needed
+ *	line discipline hangup and signal delivery. The tty object itself
+ *	remains intact.
+ *
+ *	Locking:
+ *		BKL
+ *		redirect lock for undoing redirection
+ *		file list lock for manipulating list of ttys
+ *		tty_ldisc_lock from called functions
+ *		termios_sem resetting termios data
+ *		tasklist_lock to walk task list for hangup event
+ *
  */
 static void do_tty_hangup(void *data)
 {
@@ -1133,6 +1335,14 @@ static void do_tty_hangup(void *data)
 		fput(f);
 }
 
+/**
+ *	tty_hangup		-	trigger a hangup event
+ *	@tty: tty to hangup
+ *
+ *	A carrier loss (virtual or otherwise) has occurred on this like
+ *	schedule a hangup sequence to run after this event.
+ */
+
 void tty_hangup(struct tty_struct * tty)
 {
 #ifdef TTY_DEBUG_HANGUP
@@ -1145,6 +1355,15 @@ void tty_hangup(struct tty_struct * tty)
 
 EXPORT_SYMBOL(tty_hangup);
 
+/**
+ *	tty_vhangup		-	process vhangup
+ *	@tty: tty to hangup
+ *
+ *	The user has asked via system call for the terminal to be hung up.
+ *	We do this synchronously so that when the syscall returns the process
+ *	is complete. That guarantee is neccessary for security reasons.
+ */
+
 void tty_vhangup(struct tty_struct * tty)
 {
 #ifdef TTY_DEBUG_HANGUP
@@ -1156,6 +1375,14 @@ void tty_vhangup(struct tty_struct * tty)
 }
 EXPORT_SYMBOL(tty_vhangup);
 
+/**
+ *	tty_hung_up_p		-	was tty hung up
+ *	@filp: file pointer of tty
+ *
+ *	Return true if the tty has been subject to a vhangup or a carrier
+ *	loss
+ */
+
 int tty_hung_up_p(struct file * filp)
 {
 	return (filp->f_op == &hung_up_tty_fops);
@@ -1163,19 +1390,28 @@ int tty_hung_up_p(struct file * filp)
 
 EXPORT_SYMBOL(tty_hung_up_p);
 
-/*
- * This function is typically called only by the session leader, when
- * it wants to disassociate itself from its controlling tty.
+/**
+ *	disassociate_ctty	-	disconnect controlling tty
+ *	@on_exit: true if exiting so need to "hang up" the session
  *
- * It performs the following functions:
+ *	This function is typically called only by the session leader, when
+ *	it wants to disassociate itself from its controlling tty.
+ *
+ *	It performs the following functions:
  * 	(1)  Sends a SIGHUP and SIGCONT to the foreground process group
  * 	(2)  Clears the tty from being controlling the session
  * 	(3)  Clears the controlling tty for all processes in the
  * 		session group.
  *
- * The argument on_exit is set to 1 if called when a process is
- * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ *	The argument on_exit is set to 1 if called when a process is
+ *	exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ *
+ *	Locking: tty_mutex is taken to protect current->signal->tty
+ *		BKL is taken for hysterical raisins
+ *		Tasklist lock is taken (under tty_mutex) to walk process
+ *		lists for the session.
  */
+
 void disassociate_ctty(int on_exit)
 {
 	struct tty_struct *tty;
@@ -1222,6 +1458,25 @@ void disassociate_ctty(int on_exit)
 	unlock_kernel();
 }
 
+
+/**
+ *	stop_tty	-	propogate flow control
+ *	@tty: tty to stop
+ *
+ *	Perform flow control to the driver. For PTY/TTY pairs we
+ *	must also propogate the TIOCKPKT status. May be called
+ *	on an already stopped device and will not re-call the driver
+ *	method.
+ *
+ *	This functionality is used by both the line disciplines for
+ *	halting incoming flow and by the driver. It may therefore be
+ *	called from any context, may be under the tty atomic_write_lock
+ *	but not always.
+ *
+ *	Locking:
+ *		Broken. Relies on BKL which is unsafe here.
+ */
+
 void stop_tty(struct tty_struct *tty)
 {
 	if (tty->stopped)
@@ -1238,6 +1493,19 @@ void stop_tty(struct tty_struct *tty)
 
 EXPORT_SYMBOL(stop_tty);
 
+/**
+ *	start_tty	-	propogate flow control
+ *	@tty: tty to start
+ *
+ *	Start a tty that has been stopped if at all possible. Perform
+ *	any neccessary wakeups and propogate the TIOCPKT status. If this
+ *	is the tty was previous stopped and is being started then the
+ *	driver start method is invoked and the line discipline woken.
+ *
+ *	Locking:
+ *		Broken. Relies on BKL which is unsafe here.
+ */
+
 void start_tty(struct tty_struct *tty)
 {
 	if (!tty->stopped || tty->flow_stopped)
@@ -1258,6 +1526,23 @@ void start_tty(struct tty_struct *tty)
 
 EXPORT_SYMBOL(start_tty);
 
+/**
+ *	tty_read	-	read method for tty device files
+ *	@file: pointer to tty file
+ *	@buf: user buffer
+ *	@count: size of user buffer
+ *	@ppos: unused
+ *
+ *	Perform the read system call function on this terminal device. Checks
+ *	for hung up devices before calling the line discipline method.
+ *
+ *	Locking:
+ *		Locks the line discipline internally while needed
+ *		For historical reasons the line discipline read method is
+ *	invoked under the BKL. This will go away in time so do not rely on it
+ *	in new code. Multiple read calls may be outstanding in parallel.
+ */
+
 static ssize_t tty_read(struct file * file, char __user * buf, size_t count, 
 			loff_t *ppos)
 {
@@ -1302,6 +1587,7 @@ static inline ssize_t do_tty_write(
 	ssize_t ret = 0, written = 0;
 	unsigned int chunk;
 	
+	/* FIXME: O_NDELAY ... */
 	if (mutex_lock_interruptible(&tty->atomic_write_lock)) {
 		return -ERESTARTSYS;
 	}
@@ -1318,6 +1604,9 @@ static inline ssize_t do_tty_write(
 	 * layer has problems with bigger chunks. It will
 	 * claim to be able to handle more characters than
 	 * it actually does.
+	 *
+	 * FIXME: This can probably go away now except that 64K chunks
+	 * are too likely to fail unless switched to vmalloc...
 	 */
 	chunk = 2048;
 	if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags))
@@ -1375,6 +1664,24 @@ static inline ssize_t do_tty_write(
 }
 
 
+/**
+ *	tty_write		-	write method for tty device file
+ *	@file: tty file pointer
+ *	@buf: user data to write
+ *	@count: bytes to write
+ *	@ppos: unused
+ *
+ *	Write data to a tty device via the line discipline.
+ *
+ *	Locking:
+ *		Locks the line discipline as required
+ *		Writes to the tty driver are serialized by the atomic_write_lock
+ *	and are then processed in chunks to the device. The line discipline
+ *	write method will not be involked in parallel for each device
+ *		The line discipline write method is called under the big
+ *	kernel lock for historical reasons. New code should not rely on this.
+ */
+
 static ssize_t tty_write(struct file * file, const char __user * buf, size_t count,
 			 loff_t *ppos)
 {
@@ -1422,7 +1729,18 @@ ssize_t redirected_tty_write(struct file * file, const char __user * buf, size_t
 
 static char ptychar[] = "pqrstuvwxyzabcde";
 
-static inline void pty_line_name(struct tty_driver *driver, int index, char *p)
+/**
+ *	pty_line_name	-	generate name for a pty
+ *	@driver: the tty driver in use
+ *	@index: the minor number
+ *	@p: output buffer of at least 6 bytes
+ *
+ *	Generate a name from a driver reference and write it to the output
+ *	buffer.
+ *
+ *	Locking: None
+ */
+static void pty_line_name(struct tty_driver *driver, int index, char *p)
 {
 	int i = index + driver->name_base;
 	/* ->name is initialized to "ttyp", but "tty" is expected */
@@ -1431,24 +1749,53 @@ static inline void pty_line_name(struct tty_driver *driver, int index, char *p)
 			ptychar[i >> 4 & 0xf], i & 0xf);
 }
 
-static inline void tty_line_name(struct tty_driver *driver, int index, char *p)
+/**
+ *	pty_line_name	-	generate name for a tty
+ *	@driver: the tty driver in use
+ *	@index: the minor number
+ *	@p: output buffer of at least 7 bytes
+ *
+ *	Generate a name from a driver reference and write it to the output
+ *	buffer.
+ *
+ *	Locking: None
+ */
+static void tty_line_name(struct tty_driver *driver, int index, char *p)
 {
 	sprintf(p, "%s%d", driver->name, index + driver->name_base);
 }
 
-/*
+/**
+ *	init_dev		-	initialise a tty device
+ *	@driver: tty driver we are opening a device on
+ *	@idx: device index
+ *	@tty: returned tty structure
+ *
+ *	Prepare a tty device. This may not be a "new" clean device but
+ *	could also be an active device. The pty drivers require special
+ *	handling because of this.
+ *
+ *	Locking:
+ *		The function is called under the tty_mutex, which
+ *	protects us from the tty struct or driver itself going away.
+ *
+ *	On exit the tty device has the line discipline attached and
+ *	a reference count of 1. If a pair was created for pty/tty use
+ *	and the other was a pty master then it too has a reference count of 1.
+ *
  * WSH 06/09/97: Rewritten to remove races and properly clean up after a
  * failed open.  The new code protects the open with a mutex, so it's
  * really quite straightforward.  The mutex locking can probably be
  * relaxed for the (most common) case of reopening a tty.
  */
+
 static int init_dev(struct tty_driver *driver, int idx,
 	struct tty_struct **ret_tty)
 {
 	struct tty_struct *tty, *o_tty;
 	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
 	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
-	int retval=0;
+	int retval = 0;
 
 	/* check whether we're reopening an existing tty */
 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
@@ -1662,10 +2009,20 @@ release_mem_out:
 	goto end_init;
 }
 
-/*
- * Releases memory associated with a tty structure, and clears out the
- * driver table slots.
+/**
+ *	release_mem		-	release tty structure memory
+ *
+ *	Releases memory associated with a tty structure, and clears out the
+ *	driver table slots. This function is called when a device is no longer
+ *	in use. It also gets called when setup of a device fails.
+ *
+ *	Locking:
+ *		tty_mutex - sometimes only
+ *		takes the file list lock internally when working on the list
+ *	of ttys that the driver keeps.
+ *		FIXME: should we require tty_mutex is held here ??
  */
+
 static void release_mem(struct tty_struct *tty, int idx)
 {
 	struct tty_struct *o_tty;
@@ -2006,18 +2363,27 @@ static void release_dev(struct file * filp)
 
 }
 
-/*
- * tty_open and tty_release keep up the tty count that contains the
- * number of opens done on a tty. We cannot use the inode-count, as
- * different inodes might point to the same tty.
+/**
+ *	tty_open		-	open a tty device
+ *	@inode: inode of device file
+ *	@filp: file pointer to tty
  *
- * Open-counting is needed for pty masters, as well as for keeping
- * track of serial lines: DTR is dropped when the last close happens.
- * (This is not done solely through tty->count, now.  - Ted 1/27/92)
+ *	tty_open and tty_release keep up the tty count that contains the
+ *	number of opens done on a tty. We cannot use the inode-count, as
+ *	different inodes might point to the same tty.
  *
- * The termios state of a pty is reset on first open so that
- * settings don't persist across reuse.
+ *	Open-counting is needed for pty masters, as well as for keeping
+ *	track of serial lines: DTR is dropped when the last close happens.
+ *	(This is not done solely through tty->count, now.  - Ted 1/27/92)
+ *
+ *	The termios state of a pty is reset on first open so that
+ *	settings don't persist across reuse.
+ *
+ *	Locking: tty_mutex protects current->signal->tty, get_tty_driver and
+ *		init_dev work. tty->count should protect the rest.
+ *		task_lock is held to update task details for sessions
  */
+
 static int tty_open(struct inode * inode, struct file * filp)
 {
 	struct tty_struct *tty;
@@ -2132,6 +2498,18 @@ got_driver:
 }
 
 #ifdef CONFIG_UNIX98_PTYS
+/**
+ *	ptmx_open		-	open a unix 98 pty master
+ *	@inode: inode of device file
+ *	@filp: file pointer to tty
+ *
+ *	Allocate a unix98 pty master device from the ptmx driver.
+ *
+ *	Locking: tty_mutex protects theinit_dev work. tty->count should
+ 		protect the rest.
+ *		allocated_ptys_lock handles the list of free pty numbers
+ */
+
 static int ptmx_open(struct inode * inode, struct file * filp)
 {
 	struct tty_struct *tty;
@@ -2191,6 +2569,18 @@ out:
 }
 #endif
 
+/**
+ *	tty_release		-	vfs callback for close
+ *	@inode: inode of tty
+ *	@filp: file pointer for handle to tty
+ *
+ *	Called the last time each file handle is closed that references
+ *	this tty. There may however be several such references.
+ *
+ *	Locking:
+ *		Takes bkl. See release_dev
+ */
+
 static int tty_release(struct inode * inode, struct file * filp)
 {
 	lock_kernel();
@@ -2199,7 +2589,18 @@ static int tty_release(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-/* No kernel lock held - fine */
+/**
+ *	tty_poll	-	check tty status
+ *	@filp: file being polled
+ *	@wait: poll wait structures to update
+ *
+ *	Call the line discipline polling method to obtain the poll
+ *	status of the device.
+ *
+ *	Locking: locks called line discipline but ldisc poll method
+ *	may be re-entered freely by other callers.
+ */
+
 static unsigned int tty_poll(struct file * filp, poll_table * wait)
 {
 	struct tty_struct * tty;
@@ -2243,6 +2644,21 @@ static int tty_fasync(int fd, struct file * filp, int on)
 	return 0;
 }
 
+/**
+ *	tiocsti			-	fake input character
+ *	@tty: tty to fake input into
+ *	@p: pointer to character
+ *
+ *	Fake input to a tty device. Does the neccessary locking and
+ *	input management.
+ *
+ *	FIXME: does not honour flow control ??
+ *
+ *	Locking:
+ *		Called functions take tty_ldisc_lock
+ *		current->signal->tty check is safe without locks
+ */
+
 static int tiocsti(struct tty_struct *tty, char __user *p)
 {
 	char ch, mbz = 0;
@@ -2258,6 +2674,18 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
 	return 0;
 }
 
+/**
+ *	tiocgwinsz		-	implement window query ioctl
+ *	@tty; tty
+ *	@arg: user buffer for result
+ *
+ *	Copies the kernel idea of the window size into the user buffer. No
+ *	locking is done.
+ *
+ *	FIXME: Returning random values racing a window size set is wrong
+ *	should lock here against that
+ */
+
 static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
 {
 	if (copy_to_user(arg, &tty->winsize, sizeof(*arg)))
@@ -2265,6 +2693,24 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
 	return 0;
 }
 
+/**
+ *	tiocswinsz		-	implement window size set ioctl
+ *	@tty; tty
+ *	@arg: user buffer for result
+ *
+ *	Copies the user idea of the window size to the kernel. Traditionally
+ *	this is just advisory information but for the Linux console it
+ *	actually has driver level meaning and triggers a VC resize.
+ *
+ *	Locking:
+ *		The console_sem is used to ensure we do not try and resize
+ *	the console twice at once.
+ *	FIXME: Two racing size sets may leave the console and kernel
+ *		parameters disagreeing. Is this exploitable ?
+ *	FIXME: Random values racing a window size get is wrong
+ *	should lock here against that
+ */
+
 static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
 	struct winsize __user * arg)
 {
@@ -2294,6 +2740,15 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
 	return 0;
 }
 
+/**
+ *	tioccons	-	allow admin to move logical console
+ *	@file: the file to become console
+ *
+ *	Allow the adminstrator to move the redirected console device
+ *
+ *	Locking: uses redirect_lock to guard the redirect information
+ */
+
 static int tioccons(struct file *file)
 {
 	if (!capable(CAP_SYS_ADMIN))
@@ -2319,6 +2774,17 @@ static int tioccons(struct file *file)
 	return 0;
 }
 
+/**
+ *	fionbio		-	non blocking ioctl
+ *	@file: file to set blocking value
+ *	@p: user parameter
+ *
+ *	Historical tty interfaces had a blocking control ioctl before
+ *	the generic functionality existed. This piece of history is preserved
+ *	in the expected tty API of posix OS's.
+ *
+ *	Locking: none, the open fle handle ensures it won't go away.
+ */
 
 static int fionbio(struct file *file, int __user *p)
 {
@@ -2334,6 +2800,23 @@ static int fionbio(struct file *file, int __user *p)
 	return 0;
 }
 
+/**
+ *	tiocsctty	-	set controlling tty
+ *	@tty: tty structure
+ *	@arg: user argument
+ *
+ *	This ioctl is used to manage job control. It permits a session
+ *	leader to set this tty as the controlling tty for the session.
+ *
+ *	Locking:
+ *		Takes tasklist lock internally to walk sessions
+ *		Takes task_lock() when updating signal->tty
+ *
+ *	FIXME: tty_mutex is needed to protect signal->tty references.
+ *	FIXME: why task_lock on the signal->tty reference ??
+ *
+ */
+
 static int tiocsctty(struct tty_struct *tty, int arg)
 {
 	struct task_struct *p;
@@ -2374,6 +2857,18 @@ static int tiocsctty(struct tty_struct *tty, int arg)
 	return 0;
 }
 
+/**
+ *	tiocgpgrp		-	get process group
+ *	@tty: tty passed by user
+ *	@real_tty: tty side of the tty pased by the user if a pty else the tty
+ *	@p: returned pid
+ *
+ *	Obtain the process group of the tty. If there is no process group
+ *	return an error.
+ *
+ *	Locking: none. Reference to ->signal->tty is safe.
+ */
+
 static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
 {
 	/*
@@ -2385,6 +2880,20 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 	return put_user(real_tty->pgrp, p);
 }
 
+/**
+ *	tiocspgrp		-	attempt to set process group
+ *	@tty: tty passed by user
+ *	@real_tty: tty side device matching tty passed by user
+ *	@p: pid pointer
+ *
+ *	Set the process group of the tty to the session passed. Only
+ *	permitted where the tty session is our session.
+ *
+ *	Locking: None
+ *
+ *	FIXME: current->signal->tty referencing is unsafe.
+ */
+
 static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
 {
 	pid_t pgrp;
@@ -2408,6 +2917,18 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 	return 0;
 }
 
+/**
+ *	tiocgsid		-	get session id
+ *	@tty: tty passed by user
+ *	@real_tty: tty side of the tty pased by the user if a pty else the tty
+ *	@p: pointer to returned session id
+ *
+ *	Obtain the session id of the tty. If there is no session
+ *	return an error.
+ *
+ *	Locking: none. Reference to ->signal->tty is safe.
+ */
+
 static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
 {
 	/*
@@ -2421,6 +2942,16 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _
 	return put_user(real_tty->session, p);
 }
 
+/**
+ *	tiocsetd	-	set line discipline
+ *	@tty: tty device
+ *	@p: pointer to user data
+ *
+ *	Set the line discipline according to user request.
+ *
+ *	Locking: see tty_set_ldisc, this function is just a helper
+ */
+
 static int tiocsetd(struct tty_struct *tty, int __user *p)
 {
 	int ldisc;
@@ -2430,6 +2961,21 @@ static int tiocsetd(struct tty_struct *tty, int __user *p)
 	return tty_set_ldisc(tty, ldisc);
 }
 
+/**
+ *	send_break	-	performed time break
+ *	@tty: device to break on
+ *	@duration: timeout in mS
+ *
+ *	Perform a timed break on hardware that lacks its own driver level
+ *	timed break functionality.
+ *
+ *	Locking:
+ *		None
+ *
+ *	FIXME:
+ *		What if two overlap
+ */
+
 static int send_break(struct tty_struct *tty, unsigned int duration)
 {
 	tty->driver->break_ctl(tty, -1);
@@ -2442,8 +2988,19 @@ static int send_break(struct tty_struct *tty, unsigned int duration)
 	return 0;
 }
 
-static int
-tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
+/**
+ *	tiocmget		-	get modem status
+ *	@tty: tty device
+ *	@file: user file pointer
+ *	@p: pointer to result
+ *
+ *	Obtain the modem status bits from the tty driver if the feature
+ *	is supported. Return -EINVAL if it is not available.
+ *
+ *	Locking: none (up to the driver)
+ */
+
+static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
 {
 	int retval = -EINVAL;
 
@@ -2456,8 +3013,20 @@ tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p)
 	return retval;
 }
 
-static int
-tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd,
+/**
+ *	tiocmset		-	set modem status
+ *	@tty: tty device
+ *	@file: user file pointer
+ *	@cmd: command - clear bits, set bits or set all
+ *	@p: pointer to desired bits
+ *
+ *	Set the modem status bits from the tty driver if the feature
+ *	is supported. Return -EINVAL if it is not available.
+ *
+ *	Locking: none (up to the driver)
+ */
+
+static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int cmd,
 	     unsigned __user *p)
 {
 	int retval = -EINVAL;
@@ -2573,6 +3142,7 @@ int tty_ioctl(struct inode * inode, struct file * file,
 			clear_bit(TTY_EXCLUSIVE, &tty->flags);
 			return 0;
 		case TIOCNOTTY:
+			/* FIXME: taks lock or tty_mutex ? */
 			if (current->signal->tty != tty)
 				return -ENOTTY;
 			if (current->signal->leader)
@@ -2753,9 +3323,16 @@ void do_SAK(struct tty_struct *tty)
 
 EXPORT_SYMBOL(do_SAK);
 
-/*
- * This routine is called out of the software interrupt to flush data
- * from the buffer chain to the line discipline.
+/**
+ *	flush_to_ldisc
+ *	@private_: tty structure passed from work queue.
+ *
+ *	This routine is called out of the software interrupt to flush data
+ *	from the buffer chain to the line discipline.
+ *
+ *	Locking: holds tty->buf.lock to guard buffer list. Drops the lock
+ *	while invoking the line discipline receive_buf method. The
+ *	receive_buf method is single threaded for each tty instance.
  */
  
 static void flush_to_ldisc(void *private_)
@@ -2831,6 +3408,8 @@ static int n_baud_table = ARRAY_SIZE(baud_table);
  *	Convert termios baud rate data into a speed. This should be called
  *	with the termios lock held if this termios is a terminal termios
  *	structure. May change the termios data.
+ *
+ *	Locking: none
  */
  
 int tty_termios_baud_rate(struct termios *termios)
@@ -2859,6 +3438,8 @@ EXPORT_SYMBOL(tty_termios_baud_rate);
  *	Returns the baud rate as an integer for this terminal. The
  *	termios lock must be held by the caller and the terminal bit
  *	flags may be updated.
+ *
+ *	Locking: none
  */
  
 int tty_get_baud_rate(struct tty_struct *tty)
@@ -2888,6 +3469,8 @@ EXPORT_SYMBOL(tty_get_baud_rate);
  *
  *	In the event of the queue being busy for flipping the work will be
  *	held off and retried later.
+ *
+ *	Locking: tty buffer lock. Driver locks in low latency mode.
  */
 
 void tty_flip_buffer_push(struct tty_struct *tty)
@@ -2907,9 +3490,16 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 EXPORT_SYMBOL(tty_flip_buffer_push);
 
 
-/*
- * This subroutine initializes a tty structure.
+/**
+ *	initialize_tty_struct
+ *	@tty: tty to initialize
+ *
+ *	This subroutine initializes a tty structure that has been newly
+ *	allocated.
+ *
+ *	Locking: none - tty in question must not be exposed at this point
  */
+
 static void initialize_tty_struct(struct tty_struct *tty)
 {
 	memset(tty, 0, sizeof(struct tty_struct));
@@ -2935,6 +3525,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
 /*
  * The default put_char routine if the driver did not define one.
  */
+
 static void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
 {
 	tty->driver->write(tty, &ch, 1);
@@ -2943,19 +3534,23 @@ static void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
 static struct class *tty_class;
 
 /**
- * tty_register_device - register a tty device
- * @driver: the tty driver that describes the tty device
- * @index: the index in the tty driver for this tty device
- * @device: a struct device that is associated with this tty device.
- *	This field is optional, if there is no known struct device for this
- *	tty device it can be set to NULL safely.
+ *	tty_register_device - register a tty device
+ *	@driver: the tty driver that describes the tty device
+ *	@index: the index in the tty driver for this tty device
+ *	@device: a struct device that is associated with this tty device.
+ *		This field is optional, if there is no known struct device
+ *		for this tty device it can be set to NULL safely.
  *
- * Returns a pointer to the class device (or ERR_PTR(-EFOO) on error).
+ *	Returns a pointer to the class device (or ERR_PTR(-EFOO) on error).
  *
- * This call is required to be made to register an individual tty device if
- * the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set.  If that
- * bit is not set, this function should not be called by a tty driver.
+ *	This call is required to be made to register an individual tty device
+ *	if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set.  If
+ *	that bit is not set, this function should not be called by a tty
+ *	driver.
+ *
+ *	Locking: ??
  */
+
 struct class_device *tty_register_device(struct tty_driver *driver,
 					 unsigned index, struct device *device)
 {
@@ -2977,13 +3572,16 @@ struct class_device *tty_register_device(struct tty_driver *driver,
 }
 
 /**
- * tty_unregister_device - unregister a tty device
- * @driver: the tty driver that describes the tty device
- * @index: the index in the tty driver for this tty device
+ * 	tty_unregister_device - unregister a tty device
+ * 	@driver: the tty driver that describes the tty device
+ * 	@index: the index in the tty driver for this tty device
  *
- * If a tty device is registered with a call to tty_register_device() then
- * this function must be made when the tty device is gone.
+ * 	If a tty device is registered with a call to tty_register_device() then
+ *	this function must be called when the tty device is gone.
+ *
+ *	Locking: ??
  */
+
 void tty_unregister_device(struct tty_driver *driver, unsigned index)
 {
 	class_device_destroy(tty_class, MKDEV(driver->major, driver->minor_start) + index);
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index f19cf9d..4ad47d3 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -36,6 +36,18 @@
 #define TERMIOS_WAIT	2
 #define TERMIOS_TERMIO	4
 
+
+/**
+ *	tty_wait_until_sent	-	wait for I/O to finish
+ *	@tty: tty we are waiting for
+ *	@timeout: how long we will wait
+ *
+ *	Wait for characters pending in a tty driver to hit the wire, or
+ *	for a timeout to occur (eg due to flow control)
+ *
+ *	Locking: none
+ */
+
 void tty_wait_until_sent(struct tty_struct * tty, long timeout)
 {
 	DECLARE_WAITQUEUE(wait, current);
@@ -94,6 +106,18 @@ static void unset_locked_termios(struct termios *termios,
 			old->c_cc[i] : termios->c_cc[i];
 }
 
+/**
+ *	change_termios		-	update termios values
+ *	@tty: tty to update
+ *	@new_termios: desired new value
+ *
+ *	Perform updates to the termios values set on this terminal. There
+ *	is a bit of layering violation here with n_tty in terms of the
+ *	internal knowledge of this function.
+ *
+ *	Locking: termios_sem
+ */
+
 static void change_termios(struct tty_struct * tty, struct termios * new_termios)
 {
 	int canon_change;
@@ -155,6 +179,19 @@ static void change_termios(struct tty_struct * tty, struct termios * new_termios
 	up(&tty->termios_sem);
 }
 
+/**
+ *	set_termios		-	set termios values for a tty
+ *	@tty: terminal device
+ *	@arg: user data
+ *	@opt: option information
+ *
+ *	Helper function to prepare termios data and run neccessary other
+ *	functions before using change_termios to do the actual changes.
+ *
+ *	Locking:
+ *		Called functions take ldisc and termios_sem locks
+ */
+
 static int set_termios(struct tty_struct * tty, void __user *arg, int opt)
 {
 	struct termios tmp_termios;
@@ -284,6 +321,17 @@ static void set_sgflags(struct termios * termios, int flags)
 	}
 }
 
+/**
+ *	set_sgttyb		-	set legacy terminal values
+ *	@tty: tty structure
+ *	@sgttyb: pointer to old style terminal structure
+ *
+ *	Updates a terminal from the legacy BSD style terminal information
+ *	structure.
+ *
+ *	Locking: termios_sem
+ */
+
 static int set_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb)
 {
 	int retval;
@@ -369,9 +417,16 @@ static int set_ltchars(struct tty_struct * tty, struct ltchars __user * ltchars)
 }
 #endif
 
-/*
- * Send a high priority character to the tty.
+/**
+ *	send_prio_char		-	send priority character
+ *
+ *	Send a high priority character to the tty even if stopped
+ *
+ *	Locking: none
+ *
+ *	FIXME: overlapping calls with start/stop tty lose state of tty
  */
+
 static void send_prio_char(struct tty_struct *tty, char ch)
 {
 	int	was_stopped = tty->stopped;
-- 
cgit v0.10.2


From 01da5fd83d6b2c5e36b77539f6cbdd8f49849225 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Sun, 27 Aug 2006 01:24:02 -0700
Subject: [PATCH] Fix tty layer DoS and comment relevant code

Unlike the other tty comment patch this one has code changes.  Specifically
it limits the queue size for a tty to 64K characters (128Kbytes) worst case
even if the tty is ignoring tty->throttle.  This is because certain drivers
don't honour the throttle value correctly, although it is a useful
safeguard anyway.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 2cef982..bb0d919 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -275,6 +275,17 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
  *	Locking: none
  */
 
+
+/**
+ *	tty_buffer_free_all		-	free buffers used by a tty
+ *	@tty: tty to free from
+ *
+ *	Remove all the buffers pending on a tty whether queued with data
+ *	or in the free ring. Must be called when the tty is no longer in use
+ *
+ *	Locking: none
+ */
+
 static void tty_buffer_free_all(struct tty_struct *tty)
 {
 	struct tty_buffer *thead;
@@ -287,19 +298,47 @@ static void tty_buffer_free_all(struct tty_struct *tty)
 		kfree(thead);
 	}
 	tty->buf.tail = NULL;
+	tty->buf.memory_used = 0;
 }
 
+/**
+ *	tty_buffer_init		-	prepare a tty buffer structure
+ *	@tty: tty to initialise
+ *
+ *	Set up the initial state of the buffer management for a tty device.
+ *	Must be called before the other tty buffer functions are used.
+ *
+ *	Locking: none
+ */
+
 static void tty_buffer_init(struct tty_struct *tty)
 {
 	spin_lock_init(&tty->buf.lock);
 	tty->buf.head = NULL;
 	tty->buf.tail = NULL;
 	tty->buf.free = NULL;
+	tty->buf.memory_used = 0;
 }
 
-static struct tty_buffer *tty_buffer_alloc(size_t size)
+/**
+ *	tty_buffer_alloc	-	allocate a tty buffer
+ *	@tty: tty device
+ *	@size: desired size (characters)
+ *
+ *	Allocate a new tty buffer to hold the desired number of characters.
+ *	Return NULL if out of memory or the allocation would exceed the
+ *	per device queue
+ *
+ *	Locking: Caller must hold tty->buf.lock
+ */
+
+static struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size)
 {
-	struct tty_buffer *p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
+	struct tty_buffer *p;
+
+	if (tty->buf.memory_used + size > 65536)
+		return NULL;
+	p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
 	if(p == NULL)
 		return NULL;
 	p->used = 0;
@@ -309,17 +348,27 @@ static struct tty_buffer *tty_buffer_alloc(size_t size)
 	p->read = 0;
 	p->char_buf_ptr = (char *)(p->data);
 	p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size;
-/* 	printk("Flip create %p\n", p); */
+	tty->buf.memory_used += size;
 	return p;
 }
 
-/* Must be called with the tty_read lock held. This needs to acquire strategy
-   code to decide if we should kfree or relink a given expired buffer */
+/**
+ *	tty_buffer_free		-	free a tty buffer
+ *	@tty: tty owning the buffer
+ *	@b: the buffer to free
+ *
+ *	Free a tty buffer, or add it to the free list according to our
+ *	internal strategy
+ *
+ *	Locking: Caller must hold tty->buf.lock
+ */
 
 static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
 {
 	/* Dumb strategy for now - should keep some stats */
-/* 	printk("Flip dispose %p\n", b); */
+	tty->buf.memory_used -= b->size;
+	WARN_ON(tty->buf.memory_used < 0);
+
 	if(b->size >= 512)
 		kfree(b);
 	else {
@@ -328,6 +377,18 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
 	}
 }
 
+/**
+ *	tty_buffer_find		-	find a free tty buffer
+ *	@tty: tty owning the buffer
+ *	@size: characters wanted
+ *
+ *	Locate an existing suitable tty buffer or if we are lacking one then
+ *	allocate a new one. We round our buffers off in 256 character chunks
+ *	to get better allocation behaviour.
+ *
+ *	Locking: Caller must hold tty->buf.lock
+ */
+
 static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
 {
 	struct tty_buffer **tbh = &tty->buf.free;
@@ -339,20 +400,28 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
 			t->used = 0;
 			t->commit = 0;
 			t->read = 0;
-			/* DEBUG ONLY */
-/*			memset(t->data, '*', size); */
-/* 			printk("Flip recycle %p\n", t); */
+			tty->buf.memory_used += t->size;
 			return t;
 		}
 		tbh = &((*tbh)->next);
 	}
 	/* Round the buffer size out */
 	size = (size + 0xFF) & ~ 0xFF;
-	return tty_buffer_alloc(size);
+	return tty_buffer_alloc(tty, size);
 	/* Should possibly check if this fails for the largest buffer we
 	   have queued and recycle that ? */
 }
 
+/**
+ *	tty_buffer_request_room		-	grow tty buffer if needed
+ *	@tty: tty structure
+ *	@size: size desired
+ *
+ *	Make at least size bytes of linear space available for the tty
+ *	buffer. If we fail return the size we managed to find.
+ *
+ *	Locking: Takes tty->buf.lock
+ */
 int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 {
 	struct tty_buffer *b, *n;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e421d5e..04827ca 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -59,6 +59,7 @@ struct tty_bufhead {
 	struct tty_buffer *head;	/* Queue head */
 	struct tty_buffer *tail;	/* Active buffer */
 	struct tty_buffer *free;	/* Free queue head */
+	int memory_used;		/* Buffer space used excluding free queue */
 };
 /*
  * The pty uses char_buf and flag_buf as a contiguous buffer
-- 
cgit v0.10.2


From 60d4684068ff1eec78f55b5888d0bd2d4cca1520 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Sun, 27 Aug 2006 20:41:48 -0700
Subject: Linux v2.6.18-rc5


diff --git a/Makefile b/Makefile
index 8406d02..33559b5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 18
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME=Crazed Snow-Weasel
 
 # *DOCUMENTATION*
-- 
cgit v0.10.2